Index: head/sys/boot/fdt/dts/riscv/spike.dts =================================================================== --- head/sys/boot/fdt/dts/riscv/spike.dts (revision 303659) +++ head/sys/boot/fdt/dts/riscv/spike.dts (revision 303660) @@ -1,109 +1,110 @@ /*- - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /dts-v1/; / { - model = "UC Berkeley Spike Simulator RV64I"; - compatible = "riscv,rv64i"; + model = "UC Berkeley Spike Simulator RV64"; + compatible = "riscv,rv64"; #address-cells = <1>; #size-cells = <1>; #interrupt-cells = <1>; cpus { #address-cells = <1>; #size-cells = <0>; cpu@0 { device_type = "cpu"; - compatible = "riscv,rv64i"; - reg = <0x40002000>; + compatible = "riscv,rv64"; + reg = <0x40001000>; }; cpu@1 { device_type = "cpu"; - compatible = "riscv,rv64i"; - reg = <0x4000a000>; + compatible = "riscv,rv64"; + reg = <0x40002000>; }; }; aliases { console0 = &console0; }; memory { device_type = "memory"; - reg = <0x0 0x40000000>; /* 1GB at 0x0 */ + reg = <0x80000000 0x40000000>; /* 1GB at 0x80000000 */ }; soc { - #address-cells = <2>; - #size-cells = <2>; + #address-cells = <1>; + #size-cells = <1>; #interrupt-cells = <1>; compatible = "simple-bus"; ranges; pic0: pic@0 { compatible = "riscv,pic"; interrupt-controller; }; timer0: timer@0 { compatible = "riscv,timer"; - interrupts = < 1 >; + reg = < 0x40000000 0x100 >; + interrupts = < 5 >; interrupt-parent = < &pic0 >; clock-frequency = < 1000000 >; }; htif0: htif@0 { compatible = "riscv,htif"; - interrupts = < 0 >; + interrupts = < 1 >; interrupt-parent = < &pic0 >; console0: console@0 { compatible = "htif,console"; status = "okay"; }; }; }; chosen { bootargs = "-v"; stdin = "console0"; stdout = "console0"; }; }; Index: head/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h =================================================================== --- head/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h (revision 303659) +++ head/sys/cddl/contrib/opensolaris/uts/common/sys/dtrace.h (revision 303660) @@ -1,2514 +1,2514 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2007 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright (c) 2013, Joyent, Inc. All rights reserved. * Copyright (c) 2013 by Delphix. All rights reserved. */ #ifndef _SYS_DTRACE_H #define _SYS_DTRACE_H #pragma ident "%Z%%M% %I% %E% SMI" #ifdef __cplusplus extern "C" { #endif /* * DTrace Dynamic Tracing Software: Kernel Interfaces * * Note: The contents of this file are private to the implementation of the * Solaris system and DTrace subsystem and are subject to change at any time * without notice. Applications and drivers using these interfaces will fail * to run on future releases. These interfaces should not be used for any * purpose except those expressly outlined in dtrace(7D) and libdtrace(3LIB). * Please refer to the "Solaris Dynamic Tracing Guide" for more information. */ #ifndef _ASM #include #include #include #ifdef illumos #include #else #include #include #include #include #include typedef int model_t; #endif #include #ifdef illumos #include #include #else #include #endif /* * DTrace Universal Constants and Typedefs */ #define DTRACE_CPUALL -1 /* all CPUs */ #define DTRACE_IDNONE 0 /* invalid probe identifier */ #define DTRACE_EPIDNONE 0 /* invalid enabled probe identifier */ #define DTRACE_AGGIDNONE 0 /* invalid aggregation identifier */ #define DTRACE_AGGVARIDNONE 0 /* invalid aggregation variable ID */ #define DTRACE_CACHEIDNONE 0 /* invalid predicate cache */ #define DTRACE_PROVNONE 0 /* invalid provider identifier */ #define DTRACE_METAPROVNONE 0 /* invalid meta-provider identifier */ #define DTRACE_ARGNONE -1 /* invalid argument index */ #define DTRACE_PROVNAMELEN 64 #define DTRACE_MODNAMELEN 64 #define DTRACE_FUNCNAMELEN 192 #define DTRACE_NAMELEN 64 #define DTRACE_FULLNAMELEN (DTRACE_PROVNAMELEN + DTRACE_MODNAMELEN + \ DTRACE_FUNCNAMELEN + DTRACE_NAMELEN + 4) #define DTRACE_ARGTYPELEN 128 typedef uint32_t dtrace_id_t; /* probe identifier */ typedef uint32_t dtrace_epid_t; /* enabled probe identifier */ typedef uint32_t dtrace_aggid_t; /* aggregation identifier */ typedef int64_t dtrace_aggvarid_t; /* aggregation variable identifier */ typedef uint16_t dtrace_actkind_t; /* action kind */ typedef int64_t dtrace_optval_t; /* option value */ typedef uint32_t dtrace_cacheid_t; /* predicate cache identifier */ typedef enum dtrace_probespec { DTRACE_PROBESPEC_NONE = -1, DTRACE_PROBESPEC_PROVIDER = 0, DTRACE_PROBESPEC_MOD, DTRACE_PROBESPEC_FUNC, DTRACE_PROBESPEC_NAME } dtrace_probespec_t; /* * DTrace Intermediate Format (DIF) * * The following definitions describe the DTrace Intermediate Format (DIF), a * a RISC-like instruction set and program encoding used to represent * predicates and actions that can be bound to DTrace probes. The constants * below defining the number of available registers are suggested minimums; the * compiler should use DTRACEIOC_CONF to dynamically obtain the number of * registers provided by the current DTrace implementation. */ #define DIF_VERSION_1 1 /* DIF version 1: Solaris 10 Beta */ #define DIF_VERSION_2 2 /* DIF version 2: Solaris 10 FCS */ #define DIF_VERSION DIF_VERSION_2 /* latest DIF instruction set version */ #define DIF_DIR_NREGS 8 /* number of DIF integer registers */ #define DIF_DTR_NREGS 8 /* number of DIF tuple registers */ #define DIF_OP_OR 1 /* or r1, r2, rd */ #define DIF_OP_XOR 2 /* xor r1, r2, rd */ #define DIF_OP_AND 3 /* and r1, r2, rd */ #define DIF_OP_SLL 4 /* sll r1, r2, rd */ #define DIF_OP_SRL 5 /* srl r1, r2, rd */ #define DIF_OP_SUB 6 /* sub r1, r2, rd */ #define DIF_OP_ADD 7 /* add r1, r2, rd */ #define DIF_OP_MUL 8 /* mul r1, r2, rd */ #define DIF_OP_SDIV 9 /* sdiv r1, r2, rd */ #define DIF_OP_UDIV 10 /* udiv r1, r2, rd */ #define DIF_OP_SREM 11 /* srem r1, r2, rd */ #define DIF_OP_UREM 12 /* urem r1, r2, rd */ #define DIF_OP_NOT 13 /* not r1, rd */ #define DIF_OP_MOV 14 /* mov r1, rd */ #define DIF_OP_CMP 15 /* cmp r1, r2 */ #define DIF_OP_TST 16 /* tst r1 */ #define DIF_OP_BA 17 /* ba label */ #define DIF_OP_BE 18 /* be label */ #define DIF_OP_BNE 19 /* bne label */ #define DIF_OP_BG 20 /* bg label */ #define DIF_OP_BGU 21 /* bgu label */ #define DIF_OP_BGE 22 /* bge label */ #define DIF_OP_BGEU 23 /* bgeu label */ #define DIF_OP_BL 24 /* bl label */ #define DIF_OP_BLU 25 /* blu label */ #define DIF_OP_BLE 26 /* ble label */ #define DIF_OP_BLEU 27 /* bleu label */ #define DIF_OP_LDSB 28 /* ldsb [r1], rd */ #define DIF_OP_LDSH 29 /* ldsh [r1], rd */ #define DIF_OP_LDSW 30 /* ldsw [r1], rd */ #define DIF_OP_LDUB 31 /* ldub [r1], rd */ #define DIF_OP_LDUH 32 /* lduh [r1], rd */ #define DIF_OP_LDUW 33 /* lduw [r1], rd */ #define DIF_OP_LDX 34 /* ldx [r1], rd */ #define DIF_OP_RET 35 /* ret rd */ #define DIF_OP_NOP 36 /* nop */ #define DIF_OP_SETX 37 /* setx intindex, rd */ #define DIF_OP_SETS 38 /* sets strindex, rd */ #define DIF_OP_SCMP 39 /* scmp r1, r2 */ #define DIF_OP_LDGA 40 /* ldga var, ri, rd */ #define DIF_OP_LDGS 41 /* ldgs var, rd */ #define DIF_OP_STGS 42 /* stgs var, rs */ #define DIF_OP_LDTA 43 /* ldta var, ri, rd */ #define DIF_OP_LDTS 44 /* ldts var, rd */ #define DIF_OP_STTS 45 /* stts var, rs */ #define DIF_OP_SRA 46 /* sra r1, r2, rd */ #define DIF_OP_CALL 47 /* call subr, rd */ #define DIF_OP_PUSHTR 48 /* pushtr type, rs, rr */ #define DIF_OP_PUSHTV 49 /* pushtv type, rs, rv */ #define DIF_OP_POPTS 50 /* popts */ #define DIF_OP_FLUSHTS 51 /* flushts */ #define DIF_OP_LDGAA 52 /* ldgaa var, rd */ #define DIF_OP_LDTAA 53 /* ldtaa var, rd */ #define DIF_OP_STGAA 54 /* stgaa var, rs */ #define DIF_OP_STTAA 55 /* sttaa var, rs */ #define DIF_OP_LDLS 56 /* ldls var, rd */ #define DIF_OP_STLS 57 /* stls var, rs */ #define DIF_OP_ALLOCS 58 /* allocs r1, rd */ #define DIF_OP_COPYS 59 /* copys r1, r2, rd */ #define DIF_OP_STB 60 /* stb r1, [rd] */ #define DIF_OP_STH 61 /* sth r1, [rd] */ #define DIF_OP_STW 62 /* stw r1, [rd] */ #define DIF_OP_STX 63 /* stx r1, [rd] */ #define DIF_OP_ULDSB 64 /* uldsb [r1], rd */ #define DIF_OP_ULDSH 65 /* uldsh [r1], rd */ #define DIF_OP_ULDSW 66 /* uldsw [r1], rd */ #define DIF_OP_ULDUB 67 /* uldub [r1], rd */ #define DIF_OP_ULDUH 68 /* ulduh [r1], rd */ #define DIF_OP_ULDUW 69 /* ulduw [r1], rd */ #define DIF_OP_ULDX 70 /* uldx [r1], rd */ #define DIF_OP_RLDSB 71 /* rldsb [r1], rd */ #define DIF_OP_RLDSH 72 /* rldsh [r1], rd */ #define DIF_OP_RLDSW 73 /* rldsw [r1], rd */ #define DIF_OP_RLDUB 74 /* rldub [r1], rd */ #define DIF_OP_RLDUH 75 /* rlduh [r1], rd */ #define DIF_OP_RLDUW 76 /* rlduw [r1], rd */ #define DIF_OP_RLDX 77 /* rldx [r1], rd */ #define DIF_OP_XLATE 78 /* xlate xlrindex, rd */ #define DIF_OP_XLARG 79 /* xlarg xlrindex, rd */ #define DIF_INTOFF_MAX 0xffff /* highest integer table offset */ #define DIF_STROFF_MAX 0xffff /* highest string table offset */ #define DIF_REGISTER_MAX 0xff /* highest register number */ #define DIF_VARIABLE_MAX 0xffff /* highest variable identifier */ #define DIF_SUBROUTINE_MAX 0xffff /* highest subroutine code */ #define DIF_VAR_ARRAY_MIN 0x0000 /* lowest numbered array variable */ #define DIF_VAR_ARRAY_UBASE 0x0080 /* lowest user-defined array */ #define DIF_VAR_ARRAY_MAX 0x00ff /* highest numbered array variable */ #define DIF_VAR_OTHER_MIN 0x0100 /* lowest numbered scalar or assc */ #define DIF_VAR_OTHER_UBASE 0x0500 /* lowest user-defined scalar or assc */ #define DIF_VAR_OTHER_MAX 0xffff /* highest numbered scalar or assc */ #define DIF_VAR_ARGS 0x0000 /* arguments array */ #define DIF_VAR_REGS 0x0001 /* registers array */ #define DIF_VAR_UREGS 0x0002 /* user registers array */ #define DIF_VAR_CURTHREAD 0x0100 /* thread pointer */ #define DIF_VAR_TIMESTAMP 0x0101 /* timestamp */ #define DIF_VAR_VTIMESTAMP 0x0102 /* virtual timestamp */ #define DIF_VAR_IPL 0x0103 /* interrupt priority level */ #define DIF_VAR_EPID 0x0104 /* enabled probe ID */ #define DIF_VAR_ID 0x0105 /* probe ID */ #define DIF_VAR_ARG0 0x0106 /* first argument */ #define DIF_VAR_ARG1 0x0107 /* second argument */ #define DIF_VAR_ARG2 0x0108 /* third argument */ #define DIF_VAR_ARG3 0x0109 /* fourth argument */ #define DIF_VAR_ARG4 0x010a /* fifth argument */ #define DIF_VAR_ARG5 0x010b /* sixth argument */ #define DIF_VAR_ARG6 0x010c /* seventh argument */ #define DIF_VAR_ARG7 0x010d /* eighth argument */ #define DIF_VAR_ARG8 0x010e /* ninth argument */ #define DIF_VAR_ARG9 0x010f /* tenth argument */ #define DIF_VAR_STACKDEPTH 0x0110 /* stack depth */ #define DIF_VAR_CALLER 0x0111 /* caller */ #define DIF_VAR_PROBEPROV 0x0112 /* probe provider */ #define DIF_VAR_PROBEMOD 0x0113 /* probe module */ #define DIF_VAR_PROBEFUNC 0x0114 /* probe function */ #define DIF_VAR_PROBENAME 0x0115 /* probe name */ #define DIF_VAR_PID 0x0116 /* process ID */ #define DIF_VAR_TID 0x0117 /* (per-process) thread ID */ #define DIF_VAR_EXECNAME 0x0118 /* name of executable */ #define DIF_VAR_ZONENAME 0x0119 /* zone name associated with process */ #define DIF_VAR_WALLTIMESTAMP 0x011a /* wall-clock timestamp */ #define DIF_VAR_USTACKDEPTH 0x011b /* user-land stack depth */ #define DIF_VAR_UCALLER 0x011c /* user-level caller */ #define DIF_VAR_PPID 0x011d /* parent process ID */ #define DIF_VAR_UID 0x011e /* process user ID */ #define DIF_VAR_GID 0x011f /* process group ID */ #define DIF_VAR_ERRNO 0x0120 /* thread errno */ #define DIF_VAR_EXECARGS 0x0121 /* process arguments */ #ifndef illumos #define DIF_VAR_CPU 0x0200 #endif #define DIF_SUBR_RAND 0 #define DIF_SUBR_MUTEX_OWNED 1 #define DIF_SUBR_MUTEX_OWNER 2 #define DIF_SUBR_MUTEX_TYPE_ADAPTIVE 3 #define DIF_SUBR_MUTEX_TYPE_SPIN 4 #define DIF_SUBR_RW_READ_HELD 5 #define DIF_SUBR_RW_WRITE_HELD 6 #define DIF_SUBR_RW_ISWRITER 7 #define DIF_SUBR_COPYIN 8 #define DIF_SUBR_COPYINSTR 9 #define DIF_SUBR_SPECULATION 10 #define DIF_SUBR_PROGENYOF 11 #define DIF_SUBR_STRLEN 12 #define DIF_SUBR_COPYOUT 13 #define DIF_SUBR_COPYOUTSTR 14 #define DIF_SUBR_ALLOCA 15 #define DIF_SUBR_BCOPY 16 #define DIF_SUBR_COPYINTO 17 #define DIF_SUBR_MSGDSIZE 18 #define DIF_SUBR_MSGSIZE 19 #define DIF_SUBR_GETMAJOR 20 #define DIF_SUBR_GETMINOR 21 #define DIF_SUBR_DDI_PATHNAME 22 #define DIF_SUBR_STRJOIN 23 #define DIF_SUBR_LLTOSTR 24 #define DIF_SUBR_BASENAME 25 #define DIF_SUBR_DIRNAME 26 #define DIF_SUBR_CLEANPATH 27 #define DIF_SUBR_STRCHR 28 #define DIF_SUBR_STRRCHR 29 #define DIF_SUBR_STRSTR 30 #define DIF_SUBR_STRTOK 31 #define DIF_SUBR_SUBSTR 32 #define DIF_SUBR_INDEX 33 #define DIF_SUBR_RINDEX 34 #define DIF_SUBR_HTONS 35 #define DIF_SUBR_HTONL 36 #define DIF_SUBR_HTONLL 37 #define DIF_SUBR_NTOHS 38 #define DIF_SUBR_NTOHL 39 #define DIF_SUBR_NTOHLL 40 #define DIF_SUBR_INET_NTOP 41 #define DIF_SUBR_INET_NTOA 42 #define DIF_SUBR_INET_NTOA6 43 #define DIF_SUBR_TOUPPER 44 #define DIF_SUBR_TOLOWER 45 #define DIF_SUBR_MEMREF 46 #define DIF_SUBR_TYPEREF 47 #define DIF_SUBR_SX_SHARED_HELD 48 #define DIF_SUBR_SX_EXCLUSIVE_HELD 49 #define DIF_SUBR_SX_ISEXCLUSIVE 50 #define DIF_SUBR_MEMSTR 51 #define DIF_SUBR_GETF 52 #define DIF_SUBR_JSON 53 #define DIF_SUBR_STRTOLL 54 #define DIF_SUBR_MAX 54 /* max subroutine value */ typedef uint32_t dif_instr_t; #define DIF_INSTR_OP(i) (((i) >> 24) & 0xff) #define DIF_INSTR_R1(i) (((i) >> 16) & 0xff) #define DIF_INSTR_R2(i) (((i) >> 8) & 0xff) #define DIF_INSTR_RD(i) ((i) & 0xff) #define DIF_INSTR_RS(i) ((i) & 0xff) #define DIF_INSTR_LABEL(i) ((i) & 0xffffff) #define DIF_INSTR_VAR(i) (((i) >> 8) & 0xffff) #define DIF_INSTR_INTEGER(i) (((i) >> 8) & 0xffff) #define DIF_INSTR_STRING(i) (((i) >> 8) & 0xffff) #define DIF_INSTR_SUBR(i) (((i) >> 8) & 0xffff) #define DIF_INSTR_TYPE(i) (((i) >> 16) & 0xff) #define DIF_INSTR_XLREF(i) (((i) >> 8) & 0xffff) #define DIF_INSTR_FMT(op, r1, r2, d) \ (((op) << 24) | ((r1) << 16) | ((r2) << 8) | (d)) #define DIF_INSTR_NOT(r1, d) (DIF_INSTR_FMT(DIF_OP_NOT, r1, 0, d)) #define DIF_INSTR_MOV(r1, d) (DIF_INSTR_FMT(DIF_OP_MOV, r1, 0, d)) #define DIF_INSTR_CMP(op, r1, r2) (DIF_INSTR_FMT(op, r1, r2, 0)) #define DIF_INSTR_TST(r1) (DIF_INSTR_FMT(DIF_OP_TST, r1, 0, 0)) #define DIF_INSTR_BRANCH(op, label) (((op) << 24) | (label)) #define DIF_INSTR_LOAD(op, r1, d) (DIF_INSTR_FMT(op, r1, 0, d)) #define DIF_INSTR_STORE(op, r1, d) (DIF_INSTR_FMT(op, r1, 0, d)) #define DIF_INSTR_SETX(i, d) ((DIF_OP_SETX << 24) | ((i) << 8) | (d)) #define DIF_INSTR_SETS(s, d) ((DIF_OP_SETS << 24) | ((s) << 8) | (d)) #define DIF_INSTR_RET(d) (DIF_INSTR_FMT(DIF_OP_RET, 0, 0, d)) #define DIF_INSTR_NOP (DIF_OP_NOP << 24) #define DIF_INSTR_LDA(op, v, r, d) (DIF_INSTR_FMT(op, v, r, d)) #define DIF_INSTR_LDV(op, v, d) (((op) << 24) | ((v) << 8) | (d)) #define DIF_INSTR_STV(op, v, rs) (((op) << 24) | ((v) << 8) | (rs)) #define DIF_INSTR_CALL(s, d) ((DIF_OP_CALL << 24) | ((s) << 8) | (d)) #define DIF_INSTR_PUSHTS(op, t, r2, rs) (DIF_INSTR_FMT(op, t, r2, rs)) #define DIF_INSTR_POPTS (DIF_OP_POPTS << 24) #define DIF_INSTR_FLUSHTS (DIF_OP_FLUSHTS << 24) #define DIF_INSTR_ALLOCS(r1, d) (DIF_INSTR_FMT(DIF_OP_ALLOCS, r1, 0, d)) #define DIF_INSTR_COPYS(r1, r2, d) (DIF_INSTR_FMT(DIF_OP_COPYS, r1, r2, d)) #define DIF_INSTR_XLATE(op, r, d) (((op) << 24) | ((r) << 8) | (d)) #define DIF_REG_R0 0 /* %r0 is always set to zero */ /* * A DTrace Intermediate Format Type (DIF Type) is used to represent the types * of variables, function and associative array arguments, and the return type * for each DIF object (shown below). It contains a description of the type, * its size in bytes, and a module identifier. */ typedef struct dtrace_diftype { uint8_t dtdt_kind; /* type kind (see below) */ uint8_t dtdt_ckind; /* type kind in CTF */ uint8_t dtdt_flags; /* type flags (see below) */ uint8_t dtdt_pad; /* reserved for future use */ uint32_t dtdt_size; /* type size in bytes (unless string) */ } dtrace_diftype_t; #define DIF_TYPE_CTF 0 /* type is a CTF type */ #define DIF_TYPE_STRING 1 /* type is a D string */ #define DIF_TF_BYREF 0x1 /* type is passed by reference */ #define DIF_TF_BYUREF 0x2 /* user type is passed by reference */ /* * A DTrace Intermediate Format variable record is used to describe each of the * variables referenced by a given DIF object. It contains an integer variable * identifier along with variable scope and properties, as shown below. The * size of this structure must be sizeof (int) aligned. */ typedef struct dtrace_difv { uint32_t dtdv_name; /* variable name index in dtdo_strtab */ uint32_t dtdv_id; /* variable reference identifier */ uint8_t dtdv_kind; /* variable kind (see below) */ uint8_t dtdv_scope; /* variable scope (see below) */ uint16_t dtdv_flags; /* variable flags (see below) */ dtrace_diftype_t dtdv_type; /* variable type (see above) */ } dtrace_difv_t; #define DIFV_KIND_ARRAY 0 /* variable is an array of quantities */ #define DIFV_KIND_SCALAR 1 /* variable is a scalar quantity */ #define DIFV_SCOPE_GLOBAL 0 /* variable has global scope */ #define DIFV_SCOPE_THREAD 1 /* variable has thread scope */ #define DIFV_SCOPE_LOCAL 2 /* variable has local scope */ #define DIFV_F_REF 0x1 /* variable is referenced by DIFO */ #define DIFV_F_MOD 0x2 /* variable is written by DIFO */ /* * DTrace Actions * * The upper byte determines the class of the action; the low bytes determines * the specific action within that class. The classes of actions are as * follows: * * [ no class ] <= May record process- or kernel-related data * DTRACEACT_PROC <= Only records process-related data * DTRACEACT_PROC_DESTRUCTIVE <= Potentially destructive to processes * DTRACEACT_KERNEL <= Only records kernel-related data * DTRACEACT_KERNEL_DESTRUCTIVE <= Potentially destructive to the kernel * DTRACEACT_SPECULATIVE <= Speculation-related action * DTRACEACT_AGGREGATION <= Aggregating action */ #define DTRACEACT_NONE 0 /* no action */ #define DTRACEACT_DIFEXPR 1 /* action is DIF expression */ #define DTRACEACT_EXIT 2 /* exit() action */ #define DTRACEACT_PRINTF 3 /* printf() action */ #define DTRACEACT_PRINTA 4 /* printa() action */ #define DTRACEACT_LIBACT 5 /* library-controlled action */ #define DTRACEACT_TRACEMEM 6 /* tracemem() action */ #define DTRACEACT_TRACEMEM_DYNSIZE 7 /* dynamic tracemem() size */ #define DTRACEACT_PRINTM 8 /* printm() action (BSD) */ #define DTRACEACT_PRINTT 9 /* printt() action (BSD) */ #define DTRACEACT_PROC 0x0100 #define DTRACEACT_USTACK (DTRACEACT_PROC + 1) #define DTRACEACT_JSTACK (DTRACEACT_PROC + 2) #define DTRACEACT_USYM (DTRACEACT_PROC + 3) #define DTRACEACT_UMOD (DTRACEACT_PROC + 4) #define DTRACEACT_UADDR (DTRACEACT_PROC + 5) #define DTRACEACT_PROC_DESTRUCTIVE 0x0200 #define DTRACEACT_STOP (DTRACEACT_PROC_DESTRUCTIVE + 1) #define DTRACEACT_RAISE (DTRACEACT_PROC_DESTRUCTIVE + 2) #define DTRACEACT_SYSTEM (DTRACEACT_PROC_DESTRUCTIVE + 3) #define DTRACEACT_FREOPEN (DTRACEACT_PROC_DESTRUCTIVE + 4) #define DTRACEACT_PROC_CONTROL 0x0300 #define DTRACEACT_KERNEL 0x0400 #define DTRACEACT_STACK (DTRACEACT_KERNEL + 1) #define DTRACEACT_SYM (DTRACEACT_KERNEL + 2) #define DTRACEACT_MOD (DTRACEACT_KERNEL + 3) #define DTRACEACT_KERNEL_DESTRUCTIVE 0x0500 #define DTRACEACT_BREAKPOINT (DTRACEACT_KERNEL_DESTRUCTIVE + 1) #define DTRACEACT_PANIC (DTRACEACT_KERNEL_DESTRUCTIVE + 2) #define DTRACEACT_CHILL (DTRACEACT_KERNEL_DESTRUCTIVE + 3) #define DTRACEACT_SPECULATIVE 0x0600 #define DTRACEACT_SPECULATE (DTRACEACT_SPECULATIVE + 1) #define DTRACEACT_COMMIT (DTRACEACT_SPECULATIVE + 2) #define DTRACEACT_DISCARD (DTRACEACT_SPECULATIVE + 3) #define DTRACEACT_CLASS(x) ((x) & 0xff00) #define DTRACEACT_ISDESTRUCTIVE(x) \ (DTRACEACT_CLASS(x) == DTRACEACT_PROC_DESTRUCTIVE || \ DTRACEACT_CLASS(x) == DTRACEACT_KERNEL_DESTRUCTIVE) #define DTRACEACT_ISSPECULATIVE(x) \ (DTRACEACT_CLASS(x) == DTRACEACT_SPECULATIVE) #define DTRACEACT_ISPRINTFLIKE(x) \ ((x) == DTRACEACT_PRINTF || (x) == DTRACEACT_PRINTA || \ (x) == DTRACEACT_SYSTEM || (x) == DTRACEACT_FREOPEN) /* * DTrace Aggregating Actions * * These are functions f(x) for which the following is true: * * f(f(x_0) U f(x_1) U ... U f(x_n)) = f(x_0 U x_1 U ... U x_n) * * where x_n is a set of arbitrary data. Aggregating actions are in their own * DTrace action class, DTTRACEACT_AGGREGATION. The macros provided here allow * for easier processing of the aggregation argument and data payload for a few * aggregating actions (notably: quantize(), lquantize(), and ustack()). */ #define DTRACEACT_AGGREGATION 0x0700 #define DTRACEAGG_COUNT (DTRACEACT_AGGREGATION + 1) #define DTRACEAGG_MIN (DTRACEACT_AGGREGATION + 2) #define DTRACEAGG_MAX (DTRACEACT_AGGREGATION + 3) #define DTRACEAGG_AVG (DTRACEACT_AGGREGATION + 4) #define DTRACEAGG_SUM (DTRACEACT_AGGREGATION + 5) #define DTRACEAGG_STDDEV (DTRACEACT_AGGREGATION + 6) #define DTRACEAGG_QUANTIZE (DTRACEACT_AGGREGATION + 7) #define DTRACEAGG_LQUANTIZE (DTRACEACT_AGGREGATION + 8) #define DTRACEAGG_LLQUANTIZE (DTRACEACT_AGGREGATION + 9) #define DTRACEACT_ISAGG(x) \ (DTRACEACT_CLASS(x) == DTRACEACT_AGGREGATION) #define DTRACE_QUANTIZE_NBUCKETS \ (((sizeof (uint64_t) * NBBY) - 1) * 2 + 1) #define DTRACE_QUANTIZE_ZEROBUCKET ((sizeof (uint64_t) * NBBY) - 1) #define DTRACE_QUANTIZE_BUCKETVAL(buck) \ (int64_t)((buck) < DTRACE_QUANTIZE_ZEROBUCKET ? \ -(1LL << (DTRACE_QUANTIZE_ZEROBUCKET - 1 - (buck))) : \ (buck) == DTRACE_QUANTIZE_ZEROBUCKET ? 0 : \ 1LL << ((buck) - DTRACE_QUANTIZE_ZEROBUCKET - 1)) #define DTRACE_LQUANTIZE_STEPSHIFT 48 #define DTRACE_LQUANTIZE_STEPMASK ((uint64_t)UINT16_MAX << 48) #define DTRACE_LQUANTIZE_LEVELSHIFT 32 #define DTRACE_LQUANTIZE_LEVELMASK ((uint64_t)UINT16_MAX << 32) #define DTRACE_LQUANTIZE_BASESHIFT 0 #define DTRACE_LQUANTIZE_BASEMASK UINT32_MAX #define DTRACE_LQUANTIZE_STEP(x) \ (uint16_t)(((x) & DTRACE_LQUANTIZE_STEPMASK) >> \ DTRACE_LQUANTIZE_STEPSHIFT) #define DTRACE_LQUANTIZE_LEVELS(x) \ (uint16_t)(((x) & DTRACE_LQUANTIZE_LEVELMASK) >> \ DTRACE_LQUANTIZE_LEVELSHIFT) #define DTRACE_LQUANTIZE_BASE(x) \ (int32_t)(((x) & DTRACE_LQUANTIZE_BASEMASK) >> \ DTRACE_LQUANTIZE_BASESHIFT) #define DTRACE_LLQUANTIZE_FACTORSHIFT 48 #define DTRACE_LLQUANTIZE_FACTORMASK ((uint64_t)UINT16_MAX << 48) #define DTRACE_LLQUANTIZE_LOWSHIFT 32 #define DTRACE_LLQUANTIZE_LOWMASK ((uint64_t)UINT16_MAX << 32) #define DTRACE_LLQUANTIZE_HIGHSHIFT 16 #define DTRACE_LLQUANTIZE_HIGHMASK ((uint64_t)UINT16_MAX << 16) #define DTRACE_LLQUANTIZE_NSTEPSHIFT 0 #define DTRACE_LLQUANTIZE_NSTEPMASK UINT16_MAX #define DTRACE_LLQUANTIZE_FACTOR(x) \ (uint16_t)(((x) & DTRACE_LLQUANTIZE_FACTORMASK) >> \ DTRACE_LLQUANTIZE_FACTORSHIFT) #define DTRACE_LLQUANTIZE_LOW(x) \ (uint16_t)(((x) & DTRACE_LLQUANTIZE_LOWMASK) >> \ DTRACE_LLQUANTIZE_LOWSHIFT) #define DTRACE_LLQUANTIZE_HIGH(x) \ (uint16_t)(((x) & DTRACE_LLQUANTIZE_HIGHMASK) >> \ DTRACE_LLQUANTIZE_HIGHSHIFT) #define DTRACE_LLQUANTIZE_NSTEP(x) \ (uint16_t)(((x) & DTRACE_LLQUANTIZE_NSTEPMASK) >> \ DTRACE_LLQUANTIZE_NSTEPSHIFT) #define DTRACE_USTACK_NFRAMES(x) (uint32_t)((x) & UINT32_MAX) #define DTRACE_USTACK_STRSIZE(x) (uint32_t)((x) >> 32) #define DTRACE_USTACK_ARG(x, y) \ ((((uint64_t)(y)) << 32) | ((x) & UINT32_MAX)) #ifndef _LP64 #if BYTE_ORDER == _BIG_ENDIAN #define DTRACE_PTR(type, name) uint32_t name##pad; type *name #else #define DTRACE_PTR(type, name) type *name; uint32_t name##pad #endif #else #define DTRACE_PTR(type, name) type *name #endif /* * DTrace Object Format (DOF) * * DTrace programs can be persistently encoded in the DOF format so that they * may be embedded in other programs (for example, in an ELF file) or in the * dtrace driver configuration file for use in anonymous tracing. The DOF * format is versioned and extensible so that it can be revised and so that * internal data structures can be modified or extended compatibly. All DOF * structures use fixed-size types, so the 32-bit and 64-bit representations * are identical and consumers can use either data model transparently. * * The file layout is structured as follows: * * +---------------+-------------------+----- ... ----+---- ... ------+ * | dof_hdr_t | dof_sec_t[ ... ] | loadable | non-loadable | * | (file header) | (section headers) | section data | section data | * +---------------+-------------------+----- ... ----+---- ... ------+ * |<------------ dof_hdr.dofh_loadsz --------------->| | * |<------------ dof_hdr.dofh_filesz ------------------------------->| * * The file header stores meta-data including a magic number, data model for * the instrumentation, data encoding, and properties of the DIF code within. * The header describes its own size and the size of the section headers. By * convention, an array of section headers follows the file header, and then * the data for all loadable sections and unloadable sections. This permits * consumer code to easily download the headers and all loadable data into the * DTrace driver in one contiguous chunk, omitting other extraneous sections. * * The section headers describe the size, offset, alignment, and section type * for each section. Sections are described using a set of #defines that tell * the consumer what kind of data is expected. Sections can contain links to * other sections by storing a dof_secidx_t, an index into the section header * array, inside of the section data structures. The section header includes * an entry size so that sections with data arrays can grow their structures. * * The DOF data itself can contain many snippets of DIF (i.e. >1 DIFOs), which * are represented themselves as a collection of related DOF sections. This * permits us to change the set of sections associated with a DIFO over time, * and also permits us to encode DIFOs that contain different sets of sections. * When a DOF section wants to refer to a DIFO, it stores the dof_secidx_t of a * section of type DOF_SECT_DIFOHDR. This section's data is then an array of * dof_secidx_t's which in turn denote the sections associated with this DIFO. * * This loose coupling of the file structure (header and sections) to the * structure of the DTrace program itself (ECB descriptions, action * descriptions, and DIFOs) permits activities such as relocation processing * to occur in a single pass without having to understand D program structure. * * Finally, strings are always stored in ELF-style string tables along with a * string table section index and string table offset. Therefore strings in * DOF are always arbitrary-length and not bound to the current implementation. */ #define DOF_ID_SIZE 16 /* total size of dofh_ident[] in bytes */ typedef struct dof_hdr { uint8_t dofh_ident[DOF_ID_SIZE]; /* identification bytes (see below) */ uint32_t dofh_flags; /* file attribute flags (if any) */ uint32_t dofh_hdrsize; /* size of file header in bytes */ uint32_t dofh_secsize; /* size of section header in bytes */ uint32_t dofh_secnum; /* number of section headers */ uint64_t dofh_secoff; /* file offset of section headers */ uint64_t dofh_loadsz; /* file size of loadable portion */ uint64_t dofh_filesz; /* file size of entire DOF file */ uint64_t dofh_pad; /* reserved for future use */ } dof_hdr_t; #define DOF_ID_MAG0 0 /* first byte of magic number */ #define DOF_ID_MAG1 1 /* second byte of magic number */ #define DOF_ID_MAG2 2 /* third byte of magic number */ #define DOF_ID_MAG3 3 /* fourth byte of magic number */ #define DOF_ID_MODEL 4 /* DOF data model (see below) */ #define DOF_ID_ENCODING 5 /* DOF data encoding (see below) */ #define DOF_ID_VERSION 6 /* DOF file format major version (see below) */ #define DOF_ID_DIFVERS 7 /* DIF instruction set version */ #define DOF_ID_DIFIREG 8 /* DIF integer registers used by compiler */ #define DOF_ID_DIFTREG 9 /* DIF tuple registers used by compiler */ #define DOF_ID_PAD 10 /* start of padding bytes (all zeroes) */ #define DOF_MAG_MAG0 0x7F /* DOF_ID_MAG[0-3] */ #define DOF_MAG_MAG1 'D' #define DOF_MAG_MAG2 'O' #define DOF_MAG_MAG3 'F' #define DOF_MAG_STRING "\177DOF" #define DOF_MAG_STRLEN 4 #define DOF_MODEL_NONE 0 /* DOF_ID_MODEL */ #define DOF_MODEL_ILP32 1 #define DOF_MODEL_LP64 2 #ifdef _LP64 #define DOF_MODEL_NATIVE DOF_MODEL_LP64 #else #define DOF_MODEL_NATIVE DOF_MODEL_ILP32 #endif #define DOF_ENCODE_NONE 0 /* DOF_ID_ENCODING */ #define DOF_ENCODE_LSB 1 #define DOF_ENCODE_MSB 2 #if BYTE_ORDER == _BIG_ENDIAN #define DOF_ENCODE_NATIVE DOF_ENCODE_MSB #else #define DOF_ENCODE_NATIVE DOF_ENCODE_LSB #endif #define DOF_VERSION_1 1 /* DOF version 1: Solaris 10 FCS */ #define DOF_VERSION_2 2 /* DOF version 2: Solaris Express 6/06 */ #define DOF_VERSION DOF_VERSION_2 /* Latest DOF version */ #define DOF_FL_VALID 0 /* mask of all valid dofh_flags bits */ typedef uint32_t dof_secidx_t; /* section header table index type */ typedef uint32_t dof_stridx_t; /* string table index type */ #define DOF_SECIDX_NONE (-1U) /* null value for section indices */ #define DOF_STRIDX_NONE (-1U) /* null value for string indices */ typedef struct dof_sec { uint32_t dofs_type; /* section type (see below) */ uint32_t dofs_align; /* section data memory alignment */ uint32_t dofs_flags; /* section flags (if any) */ uint32_t dofs_entsize; /* size of section entry (if table) */ uint64_t dofs_offset; /* offset of section data within file */ uint64_t dofs_size; /* size of section data in bytes */ } dof_sec_t; #define DOF_SECT_NONE 0 /* null section */ #define DOF_SECT_COMMENTS 1 /* compiler comments */ #define DOF_SECT_SOURCE 2 /* D program source code */ #define DOF_SECT_ECBDESC 3 /* dof_ecbdesc_t */ #define DOF_SECT_PROBEDESC 4 /* dof_probedesc_t */ #define DOF_SECT_ACTDESC 5 /* dof_actdesc_t array */ #define DOF_SECT_DIFOHDR 6 /* dof_difohdr_t (variable length) */ #define DOF_SECT_DIF 7 /* uint32_t array of byte code */ #define DOF_SECT_STRTAB 8 /* string table */ #define DOF_SECT_VARTAB 9 /* dtrace_difv_t array */ #define DOF_SECT_RELTAB 10 /* dof_relodesc_t array */ #define DOF_SECT_TYPTAB 11 /* dtrace_diftype_t array */ #define DOF_SECT_URELHDR 12 /* dof_relohdr_t (user relocations) */ #define DOF_SECT_KRELHDR 13 /* dof_relohdr_t (kernel relocations) */ #define DOF_SECT_OPTDESC 14 /* dof_optdesc_t array */ #define DOF_SECT_PROVIDER 15 /* dof_provider_t */ #define DOF_SECT_PROBES 16 /* dof_probe_t array */ #define DOF_SECT_PRARGS 17 /* uint8_t array (probe arg mappings) */ #define DOF_SECT_PROFFS 18 /* uint32_t array (probe arg offsets) */ #define DOF_SECT_INTTAB 19 /* uint64_t array */ #define DOF_SECT_UTSNAME 20 /* struct utsname */ #define DOF_SECT_XLTAB 21 /* dof_xlref_t array */ #define DOF_SECT_XLMEMBERS 22 /* dof_xlmember_t array */ #define DOF_SECT_XLIMPORT 23 /* dof_xlator_t */ #define DOF_SECT_XLEXPORT 24 /* dof_xlator_t */ #define DOF_SECT_PREXPORT 25 /* dof_secidx_t array (exported objs) */ #define DOF_SECT_PRENOFFS 26 /* uint32_t array (enabled offsets) */ #define DOF_SECF_LOAD 1 /* section should be loaded */ #define DOF_SEC_ISLOADABLE(x) \ (((x) == DOF_SECT_ECBDESC) || ((x) == DOF_SECT_PROBEDESC) || \ ((x) == DOF_SECT_ACTDESC) || ((x) == DOF_SECT_DIFOHDR) || \ ((x) == DOF_SECT_DIF) || ((x) == DOF_SECT_STRTAB) || \ ((x) == DOF_SECT_VARTAB) || ((x) == DOF_SECT_RELTAB) || \ ((x) == DOF_SECT_TYPTAB) || ((x) == DOF_SECT_URELHDR) || \ ((x) == DOF_SECT_KRELHDR) || ((x) == DOF_SECT_OPTDESC) || \ ((x) == DOF_SECT_PROVIDER) || ((x) == DOF_SECT_PROBES) || \ ((x) == DOF_SECT_PRARGS) || ((x) == DOF_SECT_PROFFS) || \ ((x) == DOF_SECT_INTTAB) || ((x) == DOF_SECT_XLTAB) || \ ((x) == DOF_SECT_XLMEMBERS) || ((x) == DOF_SECT_XLIMPORT) || \ ((x) == DOF_SECT_XLIMPORT) || ((x) == DOF_SECT_XLEXPORT) || \ ((x) == DOF_SECT_PREXPORT) || ((x) == DOF_SECT_PRENOFFS)) typedef struct dof_ecbdesc { dof_secidx_t dofe_probes; /* link to DOF_SECT_PROBEDESC */ dof_secidx_t dofe_pred; /* link to DOF_SECT_DIFOHDR */ dof_secidx_t dofe_actions; /* link to DOF_SECT_ACTDESC */ uint32_t dofe_pad; /* reserved for future use */ uint64_t dofe_uarg; /* user-supplied library argument */ } dof_ecbdesc_t; typedef struct dof_probedesc { dof_secidx_t dofp_strtab; /* link to DOF_SECT_STRTAB section */ dof_stridx_t dofp_provider; /* provider string */ dof_stridx_t dofp_mod; /* module string */ dof_stridx_t dofp_func; /* function string */ dof_stridx_t dofp_name; /* name string */ uint32_t dofp_id; /* probe identifier (or zero) */ } dof_probedesc_t; typedef struct dof_actdesc { dof_secidx_t dofa_difo; /* link to DOF_SECT_DIFOHDR */ dof_secidx_t dofa_strtab; /* link to DOF_SECT_STRTAB section */ uint32_t dofa_kind; /* action kind (DTRACEACT_* constant) */ uint32_t dofa_ntuple; /* number of subsequent tuple actions */ uint64_t dofa_arg; /* kind-specific argument */ uint64_t dofa_uarg; /* user-supplied argument */ } dof_actdesc_t; typedef struct dof_difohdr { dtrace_diftype_t dofd_rtype; /* return type for this fragment */ dof_secidx_t dofd_links[1]; /* variable length array of indices */ } dof_difohdr_t; typedef struct dof_relohdr { dof_secidx_t dofr_strtab; /* link to DOF_SECT_STRTAB for names */ dof_secidx_t dofr_relsec; /* link to DOF_SECT_RELTAB for relos */ dof_secidx_t dofr_tgtsec; /* link to section we are relocating */ } dof_relohdr_t; typedef struct dof_relodesc { dof_stridx_t dofr_name; /* string name of relocation symbol */ uint32_t dofr_type; /* relo type (DOF_RELO_* constant) */ uint64_t dofr_offset; /* byte offset for relocation */ uint64_t dofr_data; /* additional type-specific data */ } dof_relodesc_t; #define DOF_RELO_NONE 0 /* empty relocation entry */ #define DOF_RELO_SETX 1 /* relocate setx value */ typedef struct dof_optdesc { uint32_t dofo_option; /* option identifier */ dof_secidx_t dofo_strtab; /* string table, if string option */ uint64_t dofo_value; /* option value or string index */ } dof_optdesc_t; typedef uint32_t dof_attr_t; /* encoded stability attributes */ #define DOF_ATTR(n, d, c) (((n) << 24) | ((d) << 16) | ((c) << 8)) #define DOF_ATTR_NAME(a) (((a) >> 24) & 0xff) #define DOF_ATTR_DATA(a) (((a) >> 16) & 0xff) #define DOF_ATTR_CLASS(a) (((a) >> 8) & 0xff) typedef struct dof_provider { dof_secidx_t dofpv_strtab; /* link to DOF_SECT_STRTAB section */ dof_secidx_t dofpv_probes; /* link to DOF_SECT_PROBES section */ dof_secidx_t dofpv_prargs; /* link to DOF_SECT_PRARGS section */ dof_secidx_t dofpv_proffs; /* link to DOF_SECT_PROFFS section */ dof_stridx_t dofpv_name; /* provider name string */ dof_attr_t dofpv_provattr; /* provider attributes */ dof_attr_t dofpv_modattr; /* module attributes */ dof_attr_t dofpv_funcattr; /* function attributes */ dof_attr_t dofpv_nameattr; /* name attributes */ dof_attr_t dofpv_argsattr; /* args attributes */ dof_secidx_t dofpv_prenoffs; /* link to DOF_SECT_PRENOFFS section */ } dof_provider_t; typedef struct dof_probe { uint64_t dofpr_addr; /* probe base address or offset */ dof_stridx_t dofpr_func; /* probe function string */ dof_stridx_t dofpr_name; /* probe name string */ dof_stridx_t dofpr_nargv; /* native argument type strings */ dof_stridx_t dofpr_xargv; /* translated argument type strings */ uint32_t dofpr_argidx; /* index of first argument mapping */ uint32_t dofpr_offidx; /* index of first offset entry */ uint8_t dofpr_nargc; /* native argument count */ uint8_t dofpr_xargc; /* translated argument count */ uint16_t dofpr_noffs; /* number of offset entries for probe */ uint32_t dofpr_enoffidx; /* index of first is-enabled offset */ uint16_t dofpr_nenoffs; /* number of is-enabled offsets */ uint16_t dofpr_pad1; /* reserved for future use */ uint32_t dofpr_pad2; /* reserved for future use */ } dof_probe_t; typedef struct dof_xlator { dof_secidx_t dofxl_members; /* link to DOF_SECT_XLMEMBERS section */ dof_secidx_t dofxl_strtab; /* link to DOF_SECT_STRTAB section */ dof_stridx_t dofxl_argv; /* input parameter type strings */ uint32_t dofxl_argc; /* input parameter list length */ dof_stridx_t dofxl_type; /* output type string name */ dof_attr_t dofxl_attr; /* output stability attributes */ } dof_xlator_t; typedef struct dof_xlmember { dof_secidx_t dofxm_difo; /* member link to DOF_SECT_DIFOHDR */ dof_stridx_t dofxm_name; /* member name */ dtrace_diftype_t dofxm_type; /* member type */ } dof_xlmember_t; typedef struct dof_xlref { dof_secidx_t dofxr_xlator; /* link to DOF_SECT_XLATORS section */ uint32_t dofxr_member; /* index of referenced dof_xlmember */ uint32_t dofxr_argn; /* index of argument for DIF_OP_XLARG */ } dof_xlref_t; /* * DTrace Intermediate Format Object (DIFO) * * A DIFO is used to store the compiled DIF for a D expression, its return * type, and its string and variable tables. The string table is a single * buffer of character data into which sets instructions and variable * references can reference strings using a byte offset. The variable table * is an array of dtrace_difv_t structures that describe the name and type of * each variable and the id used in the DIF code. This structure is described * above in the DIF section of this header file. The DIFO is used at both * user-level (in the library) and in the kernel, but the structure is never * passed between the two: the DOF structures form the only interface. As a * result, the definition can change depending on the presence of _KERNEL. */ typedef struct dtrace_difo { dif_instr_t *dtdo_buf; /* instruction buffer */ uint64_t *dtdo_inttab; /* integer table (optional) */ char *dtdo_strtab; /* string table (optional) */ dtrace_difv_t *dtdo_vartab; /* variable table (optional) */ uint_t dtdo_len; /* length of instruction buffer */ uint_t dtdo_intlen; /* length of integer table */ uint_t dtdo_strlen; /* length of string table */ uint_t dtdo_varlen; /* length of variable table */ dtrace_diftype_t dtdo_rtype; /* return type */ uint_t dtdo_refcnt; /* owner reference count */ uint_t dtdo_destructive; /* invokes destructive subroutines */ #ifndef _KERNEL dof_relodesc_t *dtdo_kreltab; /* kernel relocations */ dof_relodesc_t *dtdo_ureltab; /* user relocations */ struct dt_node **dtdo_xlmtab; /* translator references */ uint_t dtdo_krelen; /* length of krelo table */ uint_t dtdo_urelen; /* length of urelo table */ uint_t dtdo_xlmlen; /* length of translator table */ #endif } dtrace_difo_t; /* * DTrace Enabling Description Structures * * When DTrace is tracking the description of a DTrace enabling entity (probe, * predicate, action, ECB, record, etc.), it does so in a description * structure. These structures all end in "desc", and are used at both * user-level and in the kernel -- but (with the exception of * dtrace_probedesc_t) they are never passed between them. Typically, * user-level will use the description structures when assembling an enabling. * It will then distill those description structures into a DOF object (see * above), and send it into the kernel. The kernel will again use the * description structures to create a description of the enabling as it reads * the DOF. When the description is complete, the enabling will be actually * created -- turning it into the structures that represent the enabling * instead of merely describing it. Not surprisingly, the description * structures bear a strong resemblance to the DOF structures that act as their * conduit. */ struct dtrace_predicate; typedef struct dtrace_probedesc { dtrace_id_t dtpd_id; /* probe identifier */ char dtpd_provider[DTRACE_PROVNAMELEN]; /* probe provider name */ char dtpd_mod[DTRACE_MODNAMELEN]; /* probe module name */ char dtpd_func[DTRACE_FUNCNAMELEN]; /* probe function name */ char dtpd_name[DTRACE_NAMELEN]; /* probe name */ } dtrace_probedesc_t; typedef struct dtrace_repldesc { dtrace_probedesc_t dtrpd_match; /* probe descr. to match */ dtrace_probedesc_t dtrpd_create; /* probe descr. to create */ } dtrace_repldesc_t; typedef struct dtrace_preddesc { dtrace_difo_t *dtpdd_difo; /* pointer to DIF object */ struct dtrace_predicate *dtpdd_predicate; /* pointer to predicate */ } dtrace_preddesc_t; typedef struct dtrace_actdesc { dtrace_difo_t *dtad_difo; /* pointer to DIF object */ struct dtrace_actdesc *dtad_next; /* next action */ dtrace_actkind_t dtad_kind; /* kind of action */ uint32_t dtad_ntuple; /* number in tuple */ uint64_t dtad_arg; /* action argument */ uint64_t dtad_uarg; /* user argument */ int dtad_refcnt; /* reference count */ } dtrace_actdesc_t; typedef struct dtrace_ecbdesc { dtrace_actdesc_t *dted_action; /* action description(s) */ dtrace_preddesc_t dted_pred; /* predicate description */ dtrace_probedesc_t dted_probe; /* probe description */ uint64_t dted_uarg; /* library argument */ int dted_refcnt; /* reference count */ } dtrace_ecbdesc_t; /* * DTrace Metadata Description Structures * * DTrace separates the trace data stream from the metadata stream. The only * metadata tokens placed in the data stream are the dtrace_rechdr_t (EPID + * timestamp) or (in the case of aggregations) aggregation identifiers. To * determine the structure of the data, DTrace consumers pass the token to the * kernel, and receive in return a corresponding description of the enabled * probe (via the dtrace_eprobedesc structure) or the aggregation (via the * dtrace_aggdesc structure). Both of these structures are expressed in terms * of record descriptions (via the dtrace_recdesc structure) that describe the * exact structure of the data. Some record descriptions may also contain a * format identifier; this additional bit of metadata can be retrieved from the * kernel, for which a format description is returned via the dtrace_fmtdesc * structure. Note that all four of these structures must be bitness-neutral * to allow for a 32-bit DTrace consumer on a 64-bit kernel. */ typedef struct dtrace_recdesc { dtrace_actkind_t dtrd_action; /* kind of action */ uint32_t dtrd_size; /* size of record */ uint32_t dtrd_offset; /* offset in ECB's data */ uint16_t dtrd_alignment; /* required alignment */ uint16_t dtrd_format; /* format, if any */ uint64_t dtrd_arg; /* action argument */ uint64_t dtrd_uarg; /* user argument */ } dtrace_recdesc_t; typedef struct dtrace_eprobedesc { dtrace_epid_t dtepd_epid; /* enabled probe ID */ dtrace_id_t dtepd_probeid; /* probe ID */ uint64_t dtepd_uarg; /* library argument */ uint32_t dtepd_size; /* total size */ int dtepd_nrecs; /* number of records */ dtrace_recdesc_t dtepd_rec[1]; /* records themselves */ } dtrace_eprobedesc_t; typedef struct dtrace_aggdesc { DTRACE_PTR(char, dtagd_name); /* not filled in by kernel */ dtrace_aggvarid_t dtagd_varid; /* not filled in by kernel */ int dtagd_flags; /* not filled in by kernel */ dtrace_aggid_t dtagd_id; /* aggregation ID */ dtrace_epid_t dtagd_epid; /* enabled probe ID */ uint32_t dtagd_size; /* size in bytes */ int dtagd_nrecs; /* number of records */ uint32_t dtagd_pad; /* explicit padding */ dtrace_recdesc_t dtagd_rec[1]; /* record descriptions */ } dtrace_aggdesc_t; typedef struct dtrace_fmtdesc { DTRACE_PTR(char, dtfd_string); /* format string */ int dtfd_length; /* length of format string */ uint16_t dtfd_format; /* format identifier */ } dtrace_fmtdesc_t; #define DTRACE_SIZEOF_EPROBEDESC(desc) \ (sizeof (dtrace_eprobedesc_t) + ((desc)->dtepd_nrecs ? \ (((desc)->dtepd_nrecs - 1) * sizeof (dtrace_recdesc_t)) : 0)) #define DTRACE_SIZEOF_AGGDESC(desc) \ (sizeof (dtrace_aggdesc_t) + ((desc)->dtagd_nrecs ? \ (((desc)->dtagd_nrecs - 1) * sizeof (dtrace_recdesc_t)) : 0)) /* * DTrace Option Interface * * Run-time DTrace options are set and retrieved via DOF_SECT_OPTDESC sections * in a DOF image. The dof_optdesc structure contains an option identifier and * an option value. The valid option identifiers are found below; the mapping * between option identifiers and option identifying strings is maintained at * user-level. Note that the value of DTRACEOPT_UNSET is such that all of the * following are potentially valid option values: all positive integers, zero * and negative one. Some options (notably "bufpolicy" and "bufresize") take * predefined tokens as their values; these are defined with * DTRACEOPT_{option}_{token}. */ #define DTRACEOPT_BUFSIZE 0 /* buffer size */ #define DTRACEOPT_BUFPOLICY 1 /* buffer policy */ #define DTRACEOPT_DYNVARSIZE 2 /* dynamic variable size */ #define DTRACEOPT_AGGSIZE 3 /* aggregation size */ #define DTRACEOPT_SPECSIZE 4 /* speculation size */ #define DTRACEOPT_NSPEC 5 /* number of speculations */ #define DTRACEOPT_STRSIZE 6 /* string size */ #define DTRACEOPT_CLEANRATE 7 /* dynvar cleaning rate */ #define DTRACEOPT_CPU 8 /* CPU to trace */ #define DTRACEOPT_BUFRESIZE 9 /* buffer resizing policy */ #define DTRACEOPT_GRABANON 10 /* grab anonymous state, if any */ #define DTRACEOPT_FLOWINDENT 11 /* indent function entry/return */ #define DTRACEOPT_QUIET 12 /* only output explicitly traced data */ #define DTRACEOPT_STACKFRAMES 13 /* number of stack frames */ #define DTRACEOPT_USTACKFRAMES 14 /* number of user stack frames */ #define DTRACEOPT_AGGRATE 15 /* aggregation snapshot rate */ #define DTRACEOPT_SWITCHRATE 16 /* buffer switching rate */ #define DTRACEOPT_STATUSRATE 17 /* status rate */ #define DTRACEOPT_DESTRUCTIVE 18 /* destructive actions allowed */ #define DTRACEOPT_STACKINDENT 19 /* output indent for stack traces */ #define DTRACEOPT_RAWBYTES 20 /* always print bytes in raw form */ #define DTRACEOPT_JSTACKFRAMES 21 /* number of jstack() frames */ #define DTRACEOPT_JSTACKSTRSIZE 22 /* size of jstack() string table */ #define DTRACEOPT_AGGSORTKEY 23 /* sort aggregations by key */ #define DTRACEOPT_AGGSORTREV 24 /* reverse-sort aggregations */ #define DTRACEOPT_AGGSORTPOS 25 /* agg. position to sort on */ #define DTRACEOPT_AGGSORTKEYPOS 26 /* agg. key position to sort on */ #define DTRACEOPT_TEMPORAL 27 /* temporally ordered output */ #define DTRACEOPT_AGGHIST 28 /* histogram aggregation output */ #define DTRACEOPT_AGGPACK 29 /* packed aggregation output */ #define DTRACEOPT_AGGZOOM 30 /* zoomed aggregation scaling */ #define DTRACEOPT_ZONE 31 /* zone in which to enable probes */ #define DTRACEOPT_MAX 32 /* number of options */ #define DTRACEOPT_UNSET (dtrace_optval_t)-2 /* unset option */ #define DTRACEOPT_BUFPOLICY_RING 0 /* ring buffer */ #define DTRACEOPT_BUFPOLICY_FILL 1 /* fill buffer, then stop */ #define DTRACEOPT_BUFPOLICY_SWITCH 2 /* switch buffers */ #define DTRACEOPT_BUFRESIZE_AUTO 0 /* automatic resizing */ #define DTRACEOPT_BUFRESIZE_MANUAL 1 /* manual resizing */ /* * DTrace Buffer Interface * * In order to get a snapshot of the principal or aggregation buffer, * user-level passes a buffer description to the kernel with the dtrace_bufdesc * structure. This describes which CPU user-level is interested in, and * where user-level wishes the kernel to snapshot the buffer to (the * dtbd_data field). The kernel uses the same structure to pass back some * information regarding the buffer: the size of data actually copied out, the * number of drops, the number of errors, the offset of the oldest record, * and the time of the snapshot. * * If the buffer policy is a "switch" policy, taking a snapshot of the * principal buffer has the additional effect of switching the active and * inactive buffers. Taking a snapshot of the aggregation buffer _always_ has * the additional effect of switching the active and inactive buffers. */ typedef struct dtrace_bufdesc { uint64_t dtbd_size; /* size of buffer */ uint32_t dtbd_cpu; /* CPU or DTRACE_CPUALL */ uint32_t dtbd_errors; /* number of errors */ uint64_t dtbd_drops; /* number of drops */ DTRACE_PTR(char, dtbd_data); /* data */ uint64_t dtbd_oldest; /* offset of oldest record */ uint64_t dtbd_timestamp; /* hrtime of snapshot */ } dtrace_bufdesc_t; /* * Each record in the buffer (dtbd_data) begins with a header that includes * the epid and a timestamp. The timestamp is split into two 4-byte parts * so that we do not require 8-byte alignment. */ typedef struct dtrace_rechdr { dtrace_epid_t dtrh_epid; /* enabled probe id */ uint32_t dtrh_timestamp_hi; /* high bits of hrtime_t */ uint32_t dtrh_timestamp_lo; /* low bits of hrtime_t */ } dtrace_rechdr_t; #define DTRACE_RECORD_LOAD_TIMESTAMP(dtrh) \ ((dtrh)->dtrh_timestamp_lo + \ ((uint64_t)(dtrh)->dtrh_timestamp_hi << 32)) #define DTRACE_RECORD_STORE_TIMESTAMP(dtrh, hrtime) { \ (dtrh)->dtrh_timestamp_lo = (uint32_t)hrtime; \ (dtrh)->dtrh_timestamp_hi = hrtime >> 32; \ } /* * DTrace Status * * The status of DTrace is relayed via the dtrace_status structure. This * structure contains members to count drops other than the capacity drops * available via the buffer interface (see above). This consists of dynamic * drops (including capacity dynamic drops, rinsing drops and dirty drops), and * speculative drops (including capacity speculative drops, drops due to busy * speculative buffers and drops due to unavailable speculative buffers). * Additionally, the status structure contains a field to indicate the number * of "fill"-policy buffers have been filled and a boolean field to indicate * that exit() has been called. If the dtst_exiting field is non-zero, no * further data will be generated until tracing is stopped (at which time any * enablings of the END action will be processed); if user-level sees that * this field is non-zero, tracing should be stopped as soon as possible. */ typedef struct dtrace_status { uint64_t dtst_dyndrops; /* dynamic drops */ uint64_t dtst_dyndrops_rinsing; /* dyn drops due to rinsing */ uint64_t dtst_dyndrops_dirty; /* dyn drops due to dirty */ uint64_t dtst_specdrops; /* speculative drops */ uint64_t dtst_specdrops_busy; /* spec drops due to busy */ uint64_t dtst_specdrops_unavail; /* spec drops due to unavail */ uint64_t dtst_errors; /* total errors */ uint64_t dtst_filled; /* number of filled bufs */ uint64_t dtst_stkstroverflows; /* stack string tab overflows */ uint64_t dtst_dblerrors; /* errors in ERROR probes */ char dtst_killed; /* non-zero if killed */ char dtst_exiting; /* non-zero if exit() called */ char dtst_pad[6]; /* pad out to 64-bit align */ } dtrace_status_t; /* * DTrace Configuration * * User-level may need to understand some elements of the kernel DTrace * configuration in order to generate correct DIF. This information is * conveyed via the dtrace_conf structure. */ typedef struct dtrace_conf { uint_t dtc_difversion; /* supported DIF version */ uint_t dtc_difintregs; /* # of DIF integer registers */ uint_t dtc_diftupregs; /* # of DIF tuple registers */ uint_t dtc_ctfmodel; /* CTF data model */ uint_t dtc_pad[8]; /* reserved for future use */ } dtrace_conf_t; /* * DTrace Faults * * The constants below DTRACEFLT_LIBRARY indicate probe processing faults; * constants at or above DTRACEFLT_LIBRARY indicate faults in probe * postprocessing at user-level. Probe processing faults induce an ERROR * probe and are replicated in unistd.d to allow users' ERROR probes to decode * the error condition using thse symbolic labels. */ #define DTRACEFLT_UNKNOWN 0 /* Unknown fault */ #define DTRACEFLT_BADADDR 1 /* Bad address */ #define DTRACEFLT_BADALIGN 2 /* Bad alignment */ #define DTRACEFLT_ILLOP 3 /* Illegal operation */ #define DTRACEFLT_DIVZERO 4 /* Divide-by-zero */ #define DTRACEFLT_NOSCRATCH 5 /* Out of scratch space */ #define DTRACEFLT_KPRIV 6 /* Illegal kernel access */ #define DTRACEFLT_UPRIV 7 /* Illegal user access */ #define DTRACEFLT_TUPOFLOW 8 /* Tuple stack overflow */ #define DTRACEFLT_BADSTACK 9 /* Bad stack */ #define DTRACEFLT_LIBRARY 1000 /* Library-level fault */ /* * DTrace Argument Types * * Because it would waste both space and time, argument types do not reside * with the probe. In order to determine argument types for args[X] * variables, the D compiler queries for argument types on a probe-by-probe * basis. (This optimizes for the common case that arguments are either not * used or used in an untyped fashion.) Typed arguments are specified with a * string of the type name in the dtragd_native member of the argument * description structure. Typed arguments may be further translated to types * of greater stability; the provider indicates such a translated argument by * filling in the dtargd_xlate member with the string of the translated type. * Finally, the provider may indicate which argument value a given argument * maps to by setting the dtargd_mapping member -- allowing a single argument * to map to multiple args[X] variables. */ typedef struct dtrace_argdesc { dtrace_id_t dtargd_id; /* probe identifier */ int dtargd_ndx; /* arg number (-1 iff none) */ int dtargd_mapping; /* value mapping */ char dtargd_native[DTRACE_ARGTYPELEN]; /* native type name */ char dtargd_xlate[DTRACE_ARGTYPELEN]; /* translated type name */ } dtrace_argdesc_t; /* * DTrace Stability Attributes * * Each DTrace provider advertises the name and data stability of each of its * probe description components, as well as its architectural dependencies. * The D compiler can query the provider attributes (dtrace_pattr_t below) in * order to compute the properties of an input program and report them. */ typedef uint8_t dtrace_stability_t; /* stability code (see attributes(5)) */ typedef uint8_t dtrace_class_t; /* architectural dependency class */ #define DTRACE_STABILITY_INTERNAL 0 /* private to DTrace itself */ #define DTRACE_STABILITY_PRIVATE 1 /* private to Sun (see docs) */ #define DTRACE_STABILITY_OBSOLETE 2 /* scheduled for removal */ #define DTRACE_STABILITY_EXTERNAL 3 /* not controlled by Sun */ #define DTRACE_STABILITY_UNSTABLE 4 /* new or rapidly changing */ #define DTRACE_STABILITY_EVOLVING 5 /* less rapidly changing */ #define DTRACE_STABILITY_STABLE 6 /* mature interface from Sun */ #define DTRACE_STABILITY_STANDARD 7 /* industry standard */ #define DTRACE_STABILITY_MAX 7 /* maximum valid stability */ #define DTRACE_CLASS_UNKNOWN 0 /* unknown architectural dependency */ #define DTRACE_CLASS_CPU 1 /* CPU-module-specific */ #define DTRACE_CLASS_PLATFORM 2 /* platform-specific (uname -i) */ #define DTRACE_CLASS_GROUP 3 /* hardware-group-specific (uname -m) */ #define DTRACE_CLASS_ISA 4 /* ISA-specific (uname -p) */ #define DTRACE_CLASS_COMMON 5 /* common to all systems */ #define DTRACE_CLASS_MAX 5 /* maximum valid class */ #define DTRACE_PRIV_NONE 0x0000 #define DTRACE_PRIV_KERNEL 0x0001 #define DTRACE_PRIV_USER 0x0002 #define DTRACE_PRIV_PROC 0x0004 #define DTRACE_PRIV_OWNER 0x0008 #define DTRACE_PRIV_ZONEOWNER 0x0010 #define DTRACE_PRIV_ALL \ (DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER | \ DTRACE_PRIV_PROC | DTRACE_PRIV_OWNER | DTRACE_PRIV_ZONEOWNER) typedef struct dtrace_ppriv { uint32_t dtpp_flags; /* privilege flags */ uid_t dtpp_uid; /* user ID */ zoneid_t dtpp_zoneid; /* zone ID */ } dtrace_ppriv_t; typedef struct dtrace_attribute { dtrace_stability_t dtat_name; /* entity name stability */ dtrace_stability_t dtat_data; /* entity data stability */ dtrace_class_t dtat_class; /* entity data dependency */ } dtrace_attribute_t; typedef struct dtrace_pattr { dtrace_attribute_t dtpa_provider; /* provider attributes */ dtrace_attribute_t dtpa_mod; /* module attributes */ dtrace_attribute_t dtpa_func; /* function attributes */ dtrace_attribute_t dtpa_name; /* name attributes */ dtrace_attribute_t dtpa_args; /* args[] attributes */ } dtrace_pattr_t; typedef struct dtrace_providerdesc { char dtvd_name[DTRACE_PROVNAMELEN]; /* provider name */ dtrace_pattr_t dtvd_attr; /* stability attributes */ dtrace_ppriv_t dtvd_priv; /* privileges required */ } dtrace_providerdesc_t; /* * DTrace Pseudodevice Interface * * DTrace is controlled through ioctl(2)'s to the in-kernel dtrace:dtrace * pseudodevice driver. These ioctls comprise the user-kernel interface to * DTrace. */ #ifdef illumos #define DTRACEIOC (('d' << 24) | ('t' << 16) | ('r' << 8)) #define DTRACEIOC_PROVIDER (DTRACEIOC | 1) /* provider query */ #define DTRACEIOC_PROBES (DTRACEIOC | 2) /* probe query */ #define DTRACEIOC_BUFSNAP (DTRACEIOC | 4) /* snapshot buffer */ #define DTRACEIOC_PROBEMATCH (DTRACEIOC | 5) /* match probes */ #define DTRACEIOC_ENABLE (DTRACEIOC | 6) /* enable probes */ #define DTRACEIOC_AGGSNAP (DTRACEIOC | 7) /* snapshot agg. */ #define DTRACEIOC_EPROBE (DTRACEIOC | 8) /* get eprobe desc. */ #define DTRACEIOC_PROBEARG (DTRACEIOC | 9) /* get probe arg */ #define DTRACEIOC_CONF (DTRACEIOC | 10) /* get config. */ #define DTRACEIOC_STATUS (DTRACEIOC | 11) /* get status */ #define DTRACEIOC_GO (DTRACEIOC | 12) /* start tracing */ #define DTRACEIOC_STOP (DTRACEIOC | 13) /* stop tracing */ #define DTRACEIOC_AGGDESC (DTRACEIOC | 15) /* get agg. desc. */ #define DTRACEIOC_FORMAT (DTRACEIOC | 16) /* get format str */ #define DTRACEIOC_DOFGET (DTRACEIOC | 17) /* get DOF */ #define DTRACEIOC_REPLICATE (DTRACEIOC | 18) /* replicate enab */ #else #define DTRACEIOC_PROVIDER _IOWR('x',1,dtrace_providerdesc_t) /* provider query */ #define DTRACEIOC_PROBES _IOWR('x',2,dtrace_probedesc_t) /* probe query */ #define DTRACEIOC_BUFSNAP _IOW('x',4,dtrace_bufdesc_t *) /* snapshot buffer */ #define DTRACEIOC_PROBEMATCH _IOWR('x',5,dtrace_probedesc_t) /* match probes */ typedef struct { void *dof; /* DOF userland address written to driver. */ int n_matched; /* # matches returned by driver. */ } dtrace_enable_io_t; #define DTRACEIOC_ENABLE _IOWR('x',6,dtrace_enable_io_t) /* enable probes */ #define DTRACEIOC_AGGSNAP _IOW('x',7,dtrace_bufdesc_t *) /* snapshot agg. */ #define DTRACEIOC_EPROBE _IOW('x',8,dtrace_eprobedesc_t) /* get eprobe desc. */ #define DTRACEIOC_PROBEARG _IOWR('x',9,dtrace_argdesc_t) /* get probe arg */ #define DTRACEIOC_CONF _IOR('x',10,dtrace_conf_t) /* get config. */ #define DTRACEIOC_STATUS _IOR('x',11,dtrace_status_t) /* get status */ #define DTRACEIOC_GO _IOR('x',12,processorid_t) /* start tracing */ #define DTRACEIOC_STOP _IOWR('x',13,processorid_t) /* stop tracing */ #define DTRACEIOC_AGGDESC _IOW('x',15,dtrace_aggdesc_t *) /* get agg. desc. */ #define DTRACEIOC_FORMAT _IOWR('x',16,dtrace_fmtdesc_t) /* get format str */ #define DTRACEIOC_DOFGET _IOW('x',17,dof_hdr_t *) /* get DOF */ #define DTRACEIOC_REPLICATE _IOW('x',18,dtrace_repldesc_t) /* replicate enab */ #endif /* * DTrace Helpers * * In general, DTrace establishes probes in processes and takes actions on * processes without knowing their specific user-level structures. Instead of * existing in the framework, process-specific knowledge is contained by the * enabling D program -- which can apply process-specific knowledge by making * appropriate use of DTrace primitives like copyin() and copyinstr() to * operate on user-level data. However, there may exist some specific probes * of particular semantic relevance that the application developer may wish to * explicitly export. For example, an application may wish to export a probe * at the point that it begins and ends certain well-defined transactions. In * addition to providing probes, programs may wish to offer assistance for * certain actions. For example, in highly dynamic environments (e.g., Java), * it may be difficult to obtain a stack trace in terms of meaningful symbol * names (the translation from instruction addresses to corresponding symbol * names may only be possible in situ); these environments may wish to define * a series of actions to be applied in situ to obtain a meaningful stack * trace. * * These two mechanisms -- user-level statically defined tracing and assisting * DTrace actions -- are provided via DTrace _helpers_. Helpers are specified * via DOF, but unlike enabling DOF, helper DOF may contain definitions of * providers, probes and their arguments. If a helper wishes to provide * action assistance, probe descriptions and corresponding DIF actions may be * specified in the helper DOF. For such helper actions, however, the probe * description describes the specific helper: all DTrace helpers have the * provider name "dtrace" and the module name "helper", and the name of the * helper is contained in the function name (for example, the ustack() helper * is named "ustack"). Any helper-specific name may be contained in the name * (for example, if a helper were to have a constructor, it might be named * "dtrace:helper::init"). Helper actions are only called when the * action that they are helping is taken. Helper actions may only return DIF * expressions, and may only call the following subroutines: * * alloca() <= Allocates memory out of the consumer's scratch space * bcopy() <= Copies memory to scratch space * copyin() <= Copies memory from user-level into consumer's scratch * copyinto() <= Copies memory into a specific location in scratch * copyinstr() <= Copies a string into a specific location in scratch * * Helper actions may only access the following built-in variables: * * curthread <= Current kthread_t pointer * tid <= Current thread identifier * pid <= Current process identifier * ppid <= Parent process identifier * uid <= Current user ID * gid <= Current group ID * execname <= Current executable name * zonename <= Current zone name * * Helper actions may not manipulate or allocate dynamic variables, but they * may have clause-local and statically-allocated global variables. The * helper action variable state is specific to the helper action -- variables * used by the helper action may not be accessed outside of the helper * action, and the helper action may not access variables that like outside * of it. Helper actions may not load from kernel memory at-large; they are * restricting to loading current user state (via copyin() and variants) and * scratch space. As with probe enablings, helper actions are executed in * program order. The result of the helper action is the result of the last * executing helper expression. * * Helpers -- composed of either providers/probes or probes/actions (or both) * -- are added by opening the "helper" minor node, and issuing an ioctl(2) * (DTRACEHIOC_ADDDOF) that specifies the dof_helper_t structure. This * encapsulates the name and base address of the user-level library or * executable publishing the helpers and probes as well as the DOF that * contains the definitions of those helpers and probes. * * The DTRACEHIOC_ADD and DTRACEHIOC_REMOVE are left in place for legacy * helpers and should no longer be used. No other ioctls are valid on the * helper minor node. */ #ifdef illumos #define DTRACEHIOC (('d' << 24) | ('t' << 16) | ('h' << 8)) #define DTRACEHIOC_ADD (DTRACEHIOC | 1) /* add helper */ #define DTRACEHIOC_REMOVE (DTRACEHIOC | 2) /* remove helper */ #define DTRACEHIOC_ADDDOF (DTRACEHIOC | 3) /* add helper DOF */ #else #define DTRACEHIOC_ADD _IOWR('z', 1, dof_hdr_t)/* add helper */ #define DTRACEHIOC_REMOVE _IOW('z', 2, int) /* remove helper */ #define DTRACEHIOC_ADDDOF _IOWR('z', 3, dof_helper_t)/* add helper DOF */ #endif typedef struct dof_helper { char dofhp_mod[DTRACE_MODNAMELEN]; /* executable or library name */ uint64_t dofhp_addr; /* base address of object */ uint64_t dofhp_dof; /* address of helper DOF */ #ifdef __FreeBSD__ pid_t dofhp_pid; /* target process ID */ int dofhp_gen; #endif } dof_helper_t; #define DTRACEMNR_DTRACE "dtrace" /* node for DTrace ops */ #define DTRACEMNR_HELPER "helper" /* node for helpers */ #define DTRACEMNRN_DTRACE 0 /* minor for DTrace ops */ #define DTRACEMNRN_HELPER 1 /* minor for helpers */ #define DTRACEMNRN_CLONE 2 /* first clone minor */ #ifdef _KERNEL /* * DTrace Provider API * * The following functions are implemented by the DTrace framework and are * used to implement separate in-kernel DTrace providers. Common functions * are provided in uts/common/os/dtrace.c. ISA-dependent subroutines are * defined in uts//dtrace/dtrace_asm.s or uts//dtrace/dtrace_isa.c. * * The provider API has two halves: the API that the providers consume from * DTrace, and the API that providers make available to DTrace. * * 1 Framework-to-Provider API * * 1.1 Overview * * The Framework-to-Provider API is represented by the dtrace_pops structure * that the provider passes to the framework when registering itself. This * structure consists of the following members: * * dtps_provide() <-- Provide all probes, all modules * dtps_provide_module() <-- Provide all probes in specified module * dtps_enable() <-- Enable specified probe * dtps_disable() <-- Disable specified probe * dtps_suspend() <-- Suspend specified probe * dtps_resume() <-- Resume specified probe * dtps_getargdesc() <-- Get the argument description for args[X] * dtps_getargval() <-- Get the value for an argX or args[X] variable * dtps_usermode() <-- Find out if the probe was fired in user mode * dtps_destroy() <-- Destroy all state associated with this probe * * 1.2 void dtps_provide(void *arg, const dtrace_probedesc_t *spec) * * 1.2.1 Overview * * Called to indicate that the provider should provide all probes. If the * specified description is non-NULL, dtps_provide() is being called because * no probe matched a specified probe -- if the provider has the ability to * create custom probes, it may wish to create a probe that matches the * specified description. * * 1.2.2 Arguments and notes * * The first argument is the cookie as passed to dtrace_register(). The * second argument is a pointer to a probe description that the provider may * wish to consider when creating custom probes. The provider is expected to * call back into the DTrace framework via dtrace_probe_create() to create * any necessary probes. dtps_provide() may be called even if the provider * has made available all probes; the provider should check the return value * of dtrace_probe_create() to handle this case. Note that the provider need * not implement both dtps_provide() and dtps_provide_module(); see * "Arguments and Notes" for dtrace_register(), below. * * 1.2.3 Return value * * None. * * 1.2.4 Caller's context * * dtps_provide() is typically called from open() or ioctl() context, but may * be called from other contexts as well. The DTrace framework is locked in * such a way that providers may not register or unregister. This means that * the provider may not call any DTrace API that affects its registration with * the framework, including dtrace_register(), dtrace_unregister(), * dtrace_invalidate(), and dtrace_condense(). However, the context is such * that the provider may (and indeed, is expected to) call probe-related * DTrace routines, including dtrace_probe_create(), dtrace_probe_lookup(), * and dtrace_probe_arg(). * * 1.3 void dtps_provide_module(void *arg, modctl_t *mp) * * 1.3.1 Overview * * Called to indicate that the provider should provide all probes in the * specified module. * * 1.3.2 Arguments and notes * * The first argument is the cookie as passed to dtrace_register(). The * second argument is a pointer to a modctl structure that indicates the * module for which probes should be created. * * 1.3.3 Return value * * None. * * 1.3.4 Caller's context * * dtps_provide_module() may be called from open() or ioctl() context, but * may also be called from a module loading context. mod_lock is held, and * the DTrace framework is locked in such a way that providers may not * register or unregister. This means that the provider may not call any * DTrace API that affects its registration with the framework, including * dtrace_register(), dtrace_unregister(), dtrace_invalidate(), and * dtrace_condense(). However, the context is such that the provider may (and * indeed, is expected to) call probe-related DTrace routines, including * dtrace_probe_create(), dtrace_probe_lookup(), and dtrace_probe_arg(). Note * that the provider need not implement both dtps_provide() and * dtps_provide_module(); see "Arguments and Notes" for dtrace_register(), * below. * * 1.4 void dtps_enable(void *arg, dtrace_id_t id, void *parg) * * 1.4.1 Overview * * Called to enable the specified probe. * * 1.4.2 Arguments and notes * * The first argument is the cookie as passed to dtrace_register(). The * second argument is the identifier of the probe to be enabled. The third * argument is the probe argument as passed to dtrace_probe_create(). * dtps_enable() will be called when a probe transitions from not being * enabled at all to having one or more ECB. The number of ECBs associated * with the probe may change without subsequent calls into the provider. * When the number of ECBs drops to zero, the provider will be explicitly * told to disable the probe via dtps_disable(). dtrace_probe() should never * be called for a probe identifier that hasn't been explicitly enabled via * dtps_enable(). * * 1.4.3 Return value * * None. * * 1.4.4 Caller's context * * The DTrace framework is locked in such a way that it may not be called * back into at all. cpu_lock is held. mod_lock is not held and may not * be acquired. * * 1.5 void dtps_disable(void *arg, dtrace_id_t id, void *parg) * * 1.5.1 Overview * * Called to disable the specified probe. * * 1.5.2 Arguments and notes * * The first argument is the cookie as passed to dtrace_register(). The * second argument is the identifier of the probe to be disabled. The third * argument is the probe argument as passed to dtrace_probe_create(). * dtps_disable() will be called when a probe transitions from being enabled * to having zero ECBs. dtrace_probe() should never be called for a probe * identifier that has been explicitly enabled via dtps_disable(). * * 1.5.3 Return value * * None. * * 1.5.4 Caller's context * * The DTrace framework is locked in such a way that it may not be called * back into at all. cpu_lock is held. mod_lock is not held and may not * be acquired. * * 1.6 void dtps_suspend(void *arg, dtrace_id_t id, void *parg) * * 1.6.1 Overview * * Called to suspend the specified enabled probe. This entry point is for * providers that may need to suspend some or all of their probes when CPUs * are being powered on or when the boot monitor is being entered for a * prolonged period of time. * * 1.6.2 Arguments and notes * * The first argument is the cookie as passed to dtrace_register(). The * second argument is the identifier of the probe to be suspended. The * third argument is the probe argument as passed to dtrace_probe_create(). * dtps_suspend will only be called on an enabled probe. Providers that * provide a dtps_suspend entry point will want to take roughly the action * that it takes for dtps_disable. * * 1.6.3 Return value * * None. * * 1.6.4 Caller's context * * Interrupts are disabled. The DTrace framework is in a state such that the * specified probe cannot be disabled or destroyed for the duration of * dtps_suspend(). As interrupts are disabled, the provider is afforded * little latitude; the provider is expected to do no more than a store to * memory. * * 1.7 void dtps_resume(void *arg, dtrace_id_t id, void *parg) * * 1.7.1 Overview * * Called to resume the specified enabled probe. This entry point is for * providers that may need to resume some or all of their probes after the * completion of an event that induced a call to dtps_suspend(). * * 1.7.2 Arguments and notes * * The first argument is the cookie as passed to dtrace_register(). The * second argument is the identifier of the probe to be resumed. The * third argument is the probe argument as passed to dtrace_probe_create(). * dtps_resume will only be called on an enabled probe. Providers that * provide a dtps_resume entry point will want to take roughly the action * that it takes for dtps_enable. * * 1.7.3 Return value * * None. * * 1.7.4 Caller's context * * Interrupts are disabled. The DTrace framework is in a state such that the * specified probe cannot be disabled or destroyed for the duration of * dtps_resume(). As interrupts are disabled, the provider is afforded * little latitude; the provider is expected to do no more than a store to * memory. * * 1.8 void dtps_getargdesc(void *arg, dtrace_id_t id, void *parg, * dtrace_argdesc_t *desc) * * 1.8.1 Overview * * Called to retrieve the argument description for an args[X] variable. * * 1.8.2 Arguments and notes * * The first argument is the cookie as passed to dtrace_register(). The * second argument is the identifier of the current probe. The third * argument is the probe argument as passed to dtrace_probe_create(). The * fourth argument is a pointer to the argument description. This * description is both an input and output parameter: it contains the * index of the desired argument in the dtargd_ndx field, and expects * the other fields to be filled in upon return. If there is no argument * corresponding to the specified index, the dtargd_ndx field should be set * to DTRACE_ARGNONE. * * 1.8.3 Return value * * None. The dtargd_ndx, dtargd_native, dtargd_xlate and dtargd_mapping * members of the dtrace_argdesc_t structure are all output values. * * 1.8.4 Caller's context * * dtps_getargdesc() is called from ioctl() context. mod_lock is held, and * the DTrace framework is locked in such a way that providers may not * register or unregister. This means that the provider may not call any * DTrace API that affects its registration with the framework, including * dtrace_register(), dtrace_unregister(), dtrace_invalidate(), and * dtrace_condense(). * * 1.9 uint64_t dtps_getargval(void *arg, dtrace_id_t id, void *parg, * int argno, int aframes) * * 1.9.1 Overview * * Called to retrieve a value for an argX or args[X] variable. * * 1.9.2 Arguments and notes * * The first argument is the cookie as passed to dtrace_register(). The * second argument is the identifier of the current probe. The third * argument is the probe argument as passed to dtrace_probe_create(). The * fourth argument is the number of the argument (the X in the example in * 1.9.1). The fifth argument is the number of stack frames that were used * to get from the actual place in the code that fired the probe to * dtrace_probe() itself, the so-called artificial frames. This argument may * be used to descend an appropriate number of frames to find the correct * values. If this entry point is left NULL, the dtrace_getarg() built-in * function is used. * * 1.9.3 Return value * * The value of the argument. * * 1.9.4 Caller's context * * This is called from within dtrace_probe() meaning that interrupts * are disabled. No locks should be taken within this entry point. * * 1.10 int dtps_usermode(void *arg, dtrace_id_t id, void *parg) * * 1.10.1 Overview * * Called to determine if the probe was fired in a user context. * * 1.10.2 Arguments and notes * * The first argument is the cookie as passed to dtrace_register(). The * second argument is the identifier of the current probe. The third * argument is the probe argument as passed to dtrace_probe_create(). This * entry point must not be left NULL for providers whose probes allow for * mixed mode tracing, that is to say those probes that can fire during * kernel- _or_ user-mode execution * * 1.10.3 Return value * * A bitwise OR that encapsulates both the mode (either DTRACE_MODE_KERNEL * or DTRACE_MODE_USER) and the policy when the privilege of the enabling * is insufficient for that mode (a combination of DTRACE_MODE_NOPRIV_DROP, * DTRACE_MODE_NOPRIV_RESTRICT, and DTRACE_MODE_LIMITEDPRIV_RESTRICT). If * DTRACE_MODE_NOPRIV_DROP bit is set, insufficient privilege will result * in the probe firing being silently ignored for the enabling; if the * DTRACE_NODE_NOPRIV_RESTRICT bit is set, insufficient privilege will not * prevent probe processing for the enabling, but restrictions will be in * place that induce a UPRIV fault upon attempt to examine probe arguments * or current process state. If the DTRACE_MODE_LIMITEDPRIV_RESTRICT bit * is set, similar restrictions will be placed upon operation if the * privilege is sufficient to process the enabling, but does not otherwise * entitle the enabling to all zones. The DTRACE_MODE_NOPRIV_DROP and * DTRACE_MODE_NOPRIV_RESTRICT are mutually exclusive (and one of these * two policies must be specified), but either may be combined (or not) * with DTRACE_MODE_LIMITEDPRIV_RESTRICT. * * 1.10.4 Caller's context * * This is called from within dtrace_probe() meaning that interrupts * are disabled. No locks should be taken within this entry point. * * 1.11 void dtps_destroy(void *arg, dtrace_id_t id, void *parg) * * 1.11.1 Overview * * Called to destroy the specified probe. * * 1.11.2 Arguments and notes * * The first argument is the cookie as passed to dtrace_register(). The * second argument is the identifier of the probe to be destroyed. The third * argument is the probe argument as passed to dtrace_probe_create(). The * provider should free all state associated with the probe. The framework * guarantees that dtps_destroy() is only called for probes that have either * been disabled via dtps_disable() or were never enabled via dtps_enable(). * Once dtps_disable() has been called for a probe, no further call will be * made specifying the probe. * * 1.11.3 Return value * * None. * * 1.11.4 Caller's context * * The DTrace framework is locked in such a way that it may not be called * back into at all. mod_lock is held. cpu_lock is not held, and may not be * acquired. * * * 2 Provider-to-Framework API * * 2.1 Overview * * The Provider-to-Framework API provides the mechanism for the provider to * register itself with the DTrace framework, to create probes, to lookup * probes and (most importantly) to fire probes. The Provider-to-Framework * consists of: * * dtrace_register() <-- Register a provider with the DTrace framework * dtrace_unregister() <-- Remove a provider's DTrace registration * dtrace_invalidate() <-- Invalidate the specified provider * dtrace_condense() <-- Remove a provider's unenabled probes * dtrace_attached() <-- Indicates whether or not DTrace has attached * dtrace_probe_create() <-- Create a DTrace probe * dtrace_probe_lookup() <-- Lookup a DTrace probe based on its name * dtrace_probe_arg() <-- Return the probe argument for a specific probe * dtrace_probe() <-- Fire the specified probe * * 2.2 int dtrace_register(const char *name, const dtrace_pattr_t *pap, * uint32_t priv, cred_t *cr, const dtrace_pops_t *pops, void *arg, * dtrace_provider_id_t *idp) * * 2.2.1 Overview * * dtrace_register() registers the calling provider with the DTrace * framework. It should generally be called by DTrace providers in their * attach(9E) entry point. * * 2.2.2 Arguments and Notes * * The first argument is the name of the provider. The second argument is a * pointer to the stability attributes for the provider. The third argument * is the privilege flags for the provider, and must be some combination of: * * DTRACE_PRIV_NONE <= All users may enable probes from this provider * * DTRACE_PRIV_PROC <= Any user with privilege of PRIV_DTRACE_PROC may * enable probes from this provider * * DTRACE_PRIV_USER <= Any user with privilege of PRIV_DTRACE_USER may * enable probes from this provider * * DTRACE_PRIV_KERNEL <= Any user with privilege of PRIV_DTRACE_KERNEL * may enable probes from this provider * * DTRACE_PRIV_OWNER <= This flag places an additional constraint on * the privilege requirements above. These probes * require either (a) a user ID matching the user * ID of the cred passed in the fourth argument * or (b) the PRIV_PROC_OWNER privilege. * * DTRACE_PRIV_ZONEOWNER<= This flag places an additional constraint on * the privilege requirements above. These probes * require either (a) a zone ID matching the zone * ID of the cred passed in the fourth argument * or (b) the PRIV_PROC_ZONE privilege. * * Note that these flags designate the _visibility_ of the probes, not * the conditions under which they may or may not fire. * * The fourth argument is the credential that is associated with the * provider. This argument should be NULL if the privilege flags don't * include DTRACE_PRIV_OWNER or DTRACE_PRIV_ZONEOWNER. If non-NULL, the * framework stashes the uid and zoneid represented by this credential * for use at probe-time, in implicit predicates. These limit visibility * of the probes to users and/or zones which have sufficient privilege to * access them. * * The fifth argument is a DTrace provider operations vector, which provides * the implementation for the Framework-to-Provider API. (See Section 1, * above.) This must be non-NULL, and each member must be non-NULL. The * exceptions to this are (1) the dtps_provide() and dtps_provide_module() * members (if the provider so desires, _one_ of these members may be left * NULL -- denoting that the provider only implements the other) and (2) * the dtps_suspend() and dtps_resume() members, which must either both be * NULL or both be non-NULL. * * The sixth argument is a cookie to be specified as the first argument for * each function in the Framework-to-Provider API. This argument may have * any value. * * The final argument is a pointer to dtrace_provider_id_t. If * dtrace_register() successfully completes, the provider identifier will be * stored in the memory pointed to be this argument. This argument must be * non-NULL. * * 2.2.3 Return value * * On success, dtrace_register() returns 0 and stores the new provider's * identifier into the memory pointed to by the idp argument. On failure, * dtrace_register() returns an errno: * * EINVAL The arguments passed to dtrace_register() were somehow invalid. * This may because a parameter that must be non-NULL was NULL, * because the name was invalid (either empty or an illegal * provider name) or because the attributes were invalid. * * No other failure code is returned. * * 2.2.4 Caller's context * * dtrace_register() may induce calls to dtrace_provide(); the provider must * hold no locks across dtrace_register() that may also be acquired by * dtrace_provide(). cpu_lock and mod_lock must not be held. * * 2.3 int dtrace_unregister(dtrace_provider_t id) * * 2.3.1 Overview * * Unregisters the specified provider from the DTrace framework. It should * generally be called by DTrace providers in their detach(9E) entry point. * * 2.3.2 Arguments and Notes * * The only argument is the provider identifier, as returned from a * successful call to dtrace_register(). As a result of calling * dtrace_unregister(), the DTrace framework will call back into the provider * via the dtps_destroy() entry point. Once dtrace_unregister() successfully * completes, however, the DTrace framework will no longer make calls through * the Framework-to-Provider API. * * 2.3.3 Return value * * On success, dtrace_unregister returns 0. On failure, dtrace_unregister() * returns an errno: * * EBUSY There are currently processes that have the DTrace pseudodevice * open, or there exists an anonymous enabling that hasn't yet * been claimed. * * No other failure code is returned. * * 2.3.4 Caller's context * * Because a call to dtrace_unregister() may induce calls through the * Framework-to-Provider API, the caller may not hold any lock across * dtrace_register() that is also acquired in any of the Framework-to- * Provider API functions. Additionally, mod_lock may not be held. * * 2.4 void dtrace_invalidate(dtrace_provider_id_t id) * * 2.4.1 Overview * * Invalidates the specified provider. All subsequent probe lookups for the * specified provider will fail, but its probes will not be removed. * * 2.4.2 Arguments and note * * The only argument is the provider identifier, as returned from a * successful call to dtrace_register(). In general, a provider's probes * always remain valid; dtrace_invalidate() is a mechanism for invalidating * an entire provider, regardless of whether or not probes are enabled or * not. Note that dtrace_invalidate() will _not_ prevent already enabled * probes from firing -- it will merely prevent any new enablings of the * provider's probes. * * 2.5 int dtrace_condense(dtrace_provider_id_t id) * * 2.5.1 Overview * * Removes all the unenabled probes for the given provider. This function is * not unlike dtrace_unregister(), except that it doesn't remove the * provider just as many of its associated probes as it can. * * 2.5.2 Arguments and Notes * * As with dtrace_unregister(), the sole argument is the provider identifier * as returned from a successful call to dtrace_register(). As a result of * calling dtrace_condense(), the DTrace framework will call back into the * given provider's dtps_destroy() entry point for each of the provider's * unenabled probes. * * 2.5.3 Return value * * Currently, dtrace_condense() always returns 0. However, consumers of this * function should check the return value as appropriate; its behavior may * change in the future. * * 2.5.4 Caller's context * * As with dtrace_unregister(), the caller may not hold any lock across * dtrace_condense() that is also acquired in the provider's entry points. * Also, mod_lock may not be held. * * 2.6 int dtrace_attached() * * 2.6.1 Overview * * Indicates whether or not DTrace has attached. * * 2.6.2 Arguments and Notes * * For most providers, DTrace makes initial contact beyond registration. * That is, once a provider has registered with DTrace, it waits to hear * from DTrace to create probes. However, some providers may wish to * proactively create probes without first being told by DTrace to do so. * If providers wish to do this, they must first call dtrace_attached() to * determine if DTrace itself has attached. If dtrace_attached() returns 0, * the provider must not make any other Provider-to-Framework API call. * * 2.6.3 Return value * * dtrace_attached() returns 1 if DTrace has attached, 0 otherwise. * * 2.7 int dtrace_probe_create(dtrace_provider_t id, const char *mod, * const char *func, const char *name, int aframes, void *arg) * * 2.7.1 Overview * * Creates a probe with specified module name, function name, and name. * * 2.7.2 Arguments and Notes * * The first argument is the provider identifier, as returned from a * successful call to dtrace_register(). The second, third, and fourth * arguments are the module name, function name, and probe name, * respectively. Of these, module name and function name may both be NULL * (in which case the probe is considered to be unanchored), or they may both * be non-NULL. The name must be non-NULL, and must point to a non-empty * string. * * The fifth argument is the number of artificial stack frames that will be * found on the stack when dtrace_probe() is called for the new probe. These * artificial frames will be automatically be pruned should the stack() or * stackdepth() functions be called as part of one of the probe's ECBs. If * the parameter doesn't add an artificial frame, this parameter should be * zero. * * The final argument is a probe argument that will be passed back to the * provider when a probe-specific operation is called. (e.g., via * dtps_enable(), dtps_disable(), etc.) * * Note that it is up to the provider to be sure that the probe that it * creates does not already exist -- if the provider is unsure of the probe's * existence, it should assure its absence with dtrace_probe_lookup() before * calling dtrace_probe_create(). * * 2.7.3 Return value * * dtrace_probe_create() always succeeds, and always returns the identifier * of the newly-created probe. * * 2.7.4 Caller's context * * While dtrace_probe_create() is generally expected to be called from * dtps_provide() and/or dtps_provide_module(), it may be called from other * non-DTrace contexts. Neither cpu_lock nor mod_lock may be held. * * 2.8 dtrace_id_t dtrace_probe_lookup(dtrace_provider_t id, const char *mod, * const char *func, const char *name) * * 2.8.1 Overview * * Looks up a probe based on provdider and one or more of module name, * function name and probe name. * * 2.8.2 Arguments and Notes * * The first argument is the provider identifier, as returned from a * successful call to dtrace_register(). The second, third, and fourth * arguments are the module name, function name, and probe name, * respectively. Any of these may be NULL; dtrace_probe_lookup() will return * the identifier of the first probe that is provided by the specified * provider and matches all of the non-NULL matching criteria. * dtrace_probe_lookup() is generally used by a provider to be check the * existence of a probe before creating it with dtrace_probe_create(). * * 2.8.3 Return value * * If the probe exists, returns its identifier. If the probe does not exist, * return DTRACE_IDNONE. * * 2.8.4 Caller's context * * While dtrace_probe_lookup() is generally expected to be called from * dtps_provide() and/or dtps_provide_module(), it may also be called from * other non-DTrace contexts. Neither cpu_lock nor mod_lock may be held. * * 2.9 void *dtrace_probe_arg(dtrace_provider_t id, dtrace_id_t probe) * * 2.9.1 Overview * * Returns the probe argument associated with the specified probe. * * 2.9.2 Arguments and Notes * * The first argument is the provider identifier, as returned from a * successful call to dtrace_register(). The second argument is a probe * identifier, as returned from dtrace_probe_lookup() or * dtrace_probe_create(). This is useful if a probe has multiple * provider-specific components to it: the provider can create the probe * once with provider-specific state, and then add to the state by looking * up the probe based on probe identifier. * * 2.9.3 Return value * * Returns the argument associated with the specified probe. If the * specified probe does not exist, or if the specified probe is not provided * by the specified provider, NULL is returned. * * 2.9.4 Caller's context * * While dtrace_probe_arg() is generally expected to be called from * dtps_provide() and/or dtps_provide_module(), it may also be called from * other non-DTrace contexts. Neither cpu_lock nor mod_lock may be held. * * 2.10 void dtrace_probe(dtrace_id_t probe, uintptr_t arg0, uintptr_t arg1, * uintptr_t arg2, uintptr_t arg3, uintptr_t arg4) * * 2.10.1 Overview * * The epicenter of DTrace: fires the specified probes with the specified * arguments. * * 2.10.2 Arguments and Notes * * The first argument is a probe identifier as returned by * dtrace_probe_create() or dtrace_probe_lookup(). The second through sixth * arguments are the values to which the D variables "arg0" through "arg4" * will be mapped. * * dtrace_probe() should be called whenever the specified probe has fired -- * however the provider defines it. * * 2.10.3 Return value * * None. * * 2.10.4 Caller's context * * dtrace_probe() may be called in virtually any context: kernel, user, * interrupt, high-level interrupt, with arbitrary adaptive locks held, with * dispatcher locks held, with interrupts disabled, etc. The only latitude * that must be afforded to DTrace is the ability to make calls within * itself (and to its in-kernel subroutines) and the ability to access * arbitrary (but mapped) memory. On some platforms, this constrains * context. For example, on UltraSPARC, dtrace_probe() cannot be called * from any context in which TL is greater than zero. dtrace_probe() may * also not be called from any routine which may be called by dtrace_probe() * -- which includes functions in the DTrace framework and some in-kernel * DTrace subroutines. All such functions "dtrace_"; providers that * instrument the kernel arbitrarily should be sure to not instrument these * routines. */ typedef struct dtrace_pops { void (*dtps_provide)(void *arg, dtrace_probedesc_t *spec); void (*dtps_provide_module)(void *arg, modctl_t *mp); void (*dtps_enable)(void *arg, dtrace_id_t id, void *parg); void (*dtps_disable)(void *arg, dtrace_id_t id, void *parg); void (*dtps_suspend)(void *arg, dtrace_id_t id, void *parg); void (*dtps_resume)(void *arg, dtrace_id_t id, void *parg); void (*dtps_getargdesc)(void *arg, dtrace_id_t id, void *parg, dtrace_argdesc_t *desc); uint64_t (*dtps_getargval)(void *arg, dtrace_id_t id, void *parg, int argno, int aframes); int (*dtps_usermode)(void *arg, dtrace_id_t id, void *parg); void (*dtps_destroy)(void *arg, dtrace_id_t id, void *parg); } dtrace_pops_t; #define DTRACE_MODE_KERNEL 0x01 #define DTRACE_MODE_USER 0x02 #define DTRACE_MODE_NOPRIV_DROP 0x10 #define DTRACE_MODE_NOPRIV_RESTRICT 0x20 #define DTRACE_MODE_LIMITEDPRIV_RESTRICT 0x40 typedef uintptr_t dtrace_provider_id_t; extern int dtrace_register(const char *, const dtrace_pattr_t *, uint32_t, cred_t *, const dtrace_pops_t *, void *, dtrace_provider_id_t *); extern int dtrace_unregister(dtrace_provider_id_t); extern int dtrace_condense(dtrace_provider_id_t); extern void dtrace_invalidate(dtrace_provider_id_t); extern dtrace_id_t dtrace_probe_lookup(dtrace_provider_id_t, char *, char *, char *); extern dtrace_id_t dtrace_probe_create(dtrace_provider_id_t, const char *, const char *, const char *, int, void *); extern void *dtrace_probe_arg(dtrace_provider_id_t, dtrace_id_t); extern void dtrace_probe(dtrace_id_t, uintptr_t arg0, uintptr_t arg1, uintptr_t arg2, uintptr_t arg3, uintptr_t arg4); /* * DTrace Meta Provider API * * The following functions are implemented by the DTrace framework and are * used to implement meta providers. Meta providers plug into the DTrace * framework and are used to instantiate new providers on the fly. At * present, there is only one type of meta provider and only one meta * provider may be registered with the DTrace framework at a time. The * sole meta provider type provides user-land static tracing facilities * by taking meta probe descriptions and adding a corresponding provider * into the DTrace framework. * * 1 Framework-to-Provider * * 1.1 Overview * * The Framework-to-Provider API is represented by the dtrace_mops structure * that the meta provider passes to the framework when registering itself as * a meta provider. This structure consists of the following members: * * dtms_create_probe() <-- Add a new probe to a created provider * dtms_provide_pid() <-- Create a new provider for a given process * dtms_remove_pid() <-- Remove a previously created provider * * 1.2 void dtms_create_probe(void *arg, void *parg, * dtrace_helper_probedesc_t *probedesc); * * 1.2.1 Overview * * Called by the DTrace framework to create a new probe in a provider * created by this meta provider. * * 1.2.2 Arguments and notes * * The first argument is the cookie as passed to dtrace_meta_register(). * The second argument is the provider cookie for the associated provider; * this is obtained from the return value of dtms_provide_pid(). The third * argument is the helper probe description. * * 1.2.3 Return value * * None * * 1.2.4 Caller's context * * dtms_create_probe() is called from either ioctl() or module load context * in the context of a newly-created provider (that is, a provider that * is a result of a call to dtms_provide_pid()). The DTrace framework is * locked in such a way that meta providers may not register or unregister, * such that no other thread can call into a meta provider operation and that * atomicity is assured with respect to meta provider operations across * dtms_provide_pid() and subsequent calls to dtms_create_probe(). * The context is thus effectively single-threaded with respect to the meta * provider, and that the meta provider cannot call dtrace_meta_register() * or dtrace_meta_unregister(). However, the context is such that the * provider may (and is expected to) call provider-related DTrace provider * APIs including dtrace_probe_create(). * * 1.3 void *dtms_provide_pid(void *arg, dtrace_meta_provider_t *mprov, * pid_t pid) * * 1.3.1 Overview * * Called by the DTrace framework to instantiate a new provider given the * description of the provider and probes in the mprov argument. The * meta provider should call dtrace_register() to insert the new provider * into the DTrace framework. * * 1.3.2 Arguments and notes * * The first argument is the cookie as passed to dtrace_meta_register(). * The second argument is a pointer to a structure describing the new * helper provider. The third argument is the process identifier for * process associated with this new provider. Note that the name of the * provider as passed to dtrace_register() should be the contatenation of * the dtmpb_provname member of the mprov argument and the processs * identifier as a string. * * 1.3.3 Return value * * The cookie for the provider that the meta provider creates. This is * the same value that it passed to dtrace_register(). * * 1.3.4 Caller's context * * dtms_provide_pid() is called from either ioctl() or module load context. * The DTrace framework is locked in such a way that meta providers may not * register or unregister. This means that the meta provider cannot call * dtrace_meta_register() or dtrace_meta_unregister(). However, the context * is such that the provider may -- and is expected to -- call * provider-related DTrace provider APIs including dtrace_register(). * * 1.4 void dtms_remove_pid(void *arg, dtrace_meta_provider_t *mprov, * pid_t pid) * * 1.4.1 Overview * * Called by the DTrace framework to remove a provider that had previously * been instantiated via the dtms_provide_pid() entry point. The meta * provider need not remove the provider immediately, but this entry * point indicates that the provider should be removed as soon as possible * using the dtrace_unregister() API. * * 1.4.2 Arguments and notes * * The first argument is the cookie as passed to dtrace_meta_register(). * The second argument is a pointer to a structure describing the helper * provider. The third argument is the process identifier for process * associated with this new provider. * * 1.4.3 Return value * * None * * 1.4.4 Caller's context * * dtms_remove_pid() is called from either ioctl() or exit() context. * The DTrace framework is locked in such a way that meta providers may not * register or unregister. This means that the meta provider cannot call * dtrace_meta_register() or dtrace_meta_unregister(). However, the context * is such that the provider may -- and is expected to -- call * provider-related DTrace provider APIs including dtrace_unregister(). */ typedef struct dtrace_helper_probedesc { char *dthpb_mod; /* probe module */ char *dthpb_func; /* probe function */ char *dthpb_name; /* probe name */ uint64_t dthpb_base; /* base address */ uint32_t *dthpb_offs; /* offsets array */ uint32_t *dthpb_enoffs; /* is-enabled offsets array */ uint32_t dthpb_noffs; /* offsets count */ uint32_t dthpb_nenoffs; /* is-enabled offsets count */ uint8_t *dthpb_args; /* argument mapping array */ uint8_t dthpb_xargc; /* translated argument count */ uint8_t dthpb_nargc; /* native argument count */ char *dthpb_xtypes; /* translated types strings */ char *dthpb_ntypes; /* native types strings */ } dtrace_helper_probedesc_t; typedef struct dtrace_helper_provdesc { char *dthpv_provname; /* provider name */ dtrace_pattr_t dthpv_pattr; /* stability attributes */ } dtrace_helper_provdesc_t; typedef struct dtrace_mops { void (*dtms_create_probe)(void *, void *, dtrace_helper_probedesc_t *); void *(*dtms_provide_pid)(void *, dtrace_helper_provdesc_t *, pid_t); void (*dtms_remove_pid)(void *, dtrace_helper_provdesc_t *, pid_t); } dtrace_mops_t; typedef uintptr_t dtrace_meta_provider_id_t; extern int dtrace_meta_register(const char *, const dtrace_mops_t *, void *, dtrace_meta_provider_id_t *); extern int dtrace_meta_unregister(dtrace_meta_provider_id_t); /* * DTrace Kernel Hooks * * The following functions are implemented by the base kernel and form a set of * hooks used by the DTrace framework. DTrace hooks are implemented in either * uts/common/os/dtrace_subr.c, an ISA-specific assembly file, or in a * uts//os/dtrace_subr.c corresponding to each hardware platform. */ typedef enum dtrace_vtime_state { DTRACE_VTIME_INACTIVE = 0, /* No DTrace, no TNF */ DTRACE_VTIME_ACTIVE, /* DTrace virtual time, no TNF */ DTRACE_VTIME_INACTIVE_TNF, /* No DTrace, TNF active */ DTRACE_VTIME_ACTIVE_TNF /* DTrace virtual time _and_ TNF */ } dtrace_vtime_state_t; #ifdef illumos extern dtrace_vtime_state_t dtrace_vtime_active; #endif extern void dtrace_vtime_switch(kthread_t *next); extern void dtrace_vtime_enable_tnf(void); extern void dtrace_vtime_disable_tnf(void); extern void dtrace_vtime_enable(void); extern void dtrace_vtime_disable(void); struct regs; struct reg; #ifdef illumos extern int (*dtrace_pid_probe_ptr)(struct reg *); extern int (*dtrace_return_probe_ptr)(struct reg *); extern void (*dtrace_fasttrap_fork_ptr)(proc_t *, proc_t *); extern void (*dtrace_fasttrap_exec_ptr)(proc_t *); extern void (*dtrace_fasttrap_exit_ptr)(proc_t *); extern void dtrace_fasttrap_fork(proc_t *, proc_t *); #endif typedef uintptr_t dtrace_icookie_t; typedef void (*dtrace_xcall_t)(void *); extern dtrace_icookie_t dtrace_interrupt_disable(void); extern void dtrace_interrupt_enable(dtrace_icookie_t); extern void dtrace_membar_producer(void); extern void dtrace_membar_consumer(void); extern void (*dtrace_cpu_init)(processorid_t); #ifdef illumos extern void (*dtrace_modload)(modctl_t *); extern void (*dtrace_modunload)(modctl_t *); #endif extern void (*dtrace_helpers_cleanup)(void); extern void (*dtrace_helpers_fork)(proc_t *parent, proc_t *child); extern void (*dtrace_cpustart_init)(void); extern void (*dtrace_cpustart_fini)(void); extern void (*dtrace_closef)(void); extern void (*dtrace_debugger_init)(void); extern void (*dtrace_debugger_fini)(void); extern dtrace_cacheid_t dtrace_predcache_id; #ifdef illumos extern hrtime_t dtrace_gethrtime(void); #else void dtrace_debug_printf(const char *, ...) __printflike(1, 2); #endif extern void dtrace_sync(void); extern void dtrace_toxic_ranges(void (*)(uintptr_t, uintptr_t)); extern void dtrace_xcall(processorid_t, dtrace_xcall_t, void *); extern void dtrace_vpanic(const char *, __va_list); extern void dtrace_panic(const char *, ...); extern int dtrace_safe_defer_signal(void); extern void dtrace_safe_synchronous_signal(void); extern int dtrace_mach_aframes(void); #if defined(__i386) || defined(__amd64) extern int dtrace_instr_size(uchar_t *instr); extern int dtrace_instr_size_isa(uchar_t *, model_t, int *); extern void dtrace_invop_callsite(void); #endif extern void dtrace_invop_add(int (*)(uintptr_t, struct trapframe *, uintptr_t)); extern void dtrace_invop_remove(int (*)(uintptr_t, struct trapframe *, uintptr_t)); #ifdef __sparc extern int dtrace_blksuword32(uintptr_t, uint32_t *, int); extern void dtrace_getfsr(uint64_t *); #endif #ifndef illumos extern void dtrace_helpers_duplicate(proc_t *, proc_t *); extern void dtrace_helpers_destroy(proc_t *); #endif #define DTRACE_CPUFLAG_ISSET(flag) \ (cpu_core[curcpu].cpuc_dtrace_flags & (flag)) #define DTRACE_CPUFLAG_SET(flag) \ (cpu_core[curcpu].cpuc_dtrace_flags |= (flag)) #define DTRACE_CPUFLAG_CLEAR(flag) \ (cpu_core[curcpu].cpuc_dtrace_flags &= ~(flag)) #endif /* _KERNEL */ #endif /* _ASM */ #if defined(__i386) || defined(__amd64) #define DTRACE_INVOP_PUSHL_EBP 1 #define DTRACE_INVOP_PUSHQ_RBP DTRACE_INVOP_PUSHL_EBP #define DTRACE_INVOP_POPL_EBP 2 #define DTRACE_INVOP_POPQ_RBP DTRACE_INVOP_POPL_EBP #define DTRACE_INVOP_LEAVE 3 #define DTRACE_INVOP_NOP 4 #define DTRACE_INVOP_RET 5 #elif defined(__powerpc__) #define DTRACE_INVOP_RET 1 #define DTRACE_INVOP_BCTR 2 #define DTRACE_INVOP_BLR 3 #define DTRACE_INVOP_JUMP 4 #define DTRACE_INVOP_MFLR_R0 5 #define DTRACE_INVOP_NOP 6 #elif defined(__arm__) #define DTRACE_INVOP_SHIFT 4 #define DTRACE_INVOP_MASK ((1 << DTRACE_INVOP_SHIFT) - 1) #define DTRACE_INVOP_DATA(x) ((x) >> DTRACE_INVOP_SHIFT) #define DTRACE_INVOP_PUSHM 1 #define DTRACE_INVOP_POPM 2 #define DTRACE_INVOP_B 3 #elif defined(__aarch64__) #define INSN_SIZE 4 #define B_MASK 0xff000000 #define B_DATA_MASK 0x00ffffff #define B_INSTR 0x14000000 #define RET_INSTR 0xd65f03c0 #define LDP_STP_MASK 0xffc00000 #define STP_32 0x29800000 #define STP_64 0xa9800000 #define LDP_32 0x28c00000 #define LDP_64 0xa8c00000 #define LDP_STP_PREIND (1 << 24) #define LDP_STP_DIR (1 << 22) /* Load instruction */ #define ARG1_SHIFT 0 #define ARG1_MASK 0x1f #define ARG2_SHIFT 10 #define ARG2_MASK 0x1f #define OFFSET_SHIFT 15 #define OFFSET_SIZE 7 #define OFFSET_MASK ((1 << OFFSET_SIZE) - 1) #define DTRACE_INVOP_PUSHM 1 #define DTRACE_INVOP_RET 2 #define DTRACE_INVOP_B 3 #elif defined(__mips__) #define INSN_SIZE 4 /* Load/Store double RA to/from SP */ #define LDSD_RA_SP_MASK 0xffff0000 #define LDSD_DATA_MASK 0x0000ffff #define SD_RA_SP 0xffbf0000 #define LD_RA_SP 0xdfbf0000 #define DTRACE_INVOP_SD 1 #define DTRACE_INVOP_LD 2 #elif defined(__riscv__) -#define SD_RA_SP_MASK 0x1fff07f -#define SD_RA_SP 0x0113023 +#define SD_RA_SP_MASK 0x01fff07f +#define SD_RA_SP 0x00113023 #define DTRACE_INVOP_SD 1 #define DTRACE_INVOP_RET 2 #define DTRACE_INVOP_NOP 3 #endif #ifdef __cplusplus } #endif #endif /* _SYS_DTRACE_H */ Index: head/sys/cddl/dev/dtrace/riscv/dtrace_asm.S =================================================================== --- head/sys/cddl/dev/dtrace/riscv/dtrace_asm.S (revision 303659) +++ head/sys/cddl/dev/dtrace/riscv/dtrace_asm.S (revision 303660) @@ -1,177 +1,177 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END * * Portions Copyright 2016 Ruslan Bukin * * $FreeBSD$ */ /* * Copyright 2004 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #define _ASM #define _LOCORE #include #include #include #include #include "assym.s" /* void dtrace_membar_producer(void) */ ENTRY(dtrace_membar_producer) RET END(dtrace_membar_producer) /* void dtrace_membar_consumer(void) */ ENTRY(dtrace_membar_consumer) RET END(dtrace_membar_consumer) /* dtrace_icookie_t dtrace_interrupt_disable(void) */ ENTRY(dtrace_interrupt_disable) - csrrci a0, sstatus, SSTATUS_IE - andi a0, a0, SSTATUS_IE + csrrci a0, sstatus, (SSTATUS_SIE) + andi a0, a0, (SSTATUS_SIE) RET END(dtrace_interrupt_disable) /* void dtrace_interrupt_enable(dtrace_icookie_t cookie) */ ENTRY(dtrace_interrupt_enable) csrs sstatus, a0 RET END(dtrace_interrupt_enable) /* uint8_t dtrace_fuword8_nocheck(void *addr) */ ENTRY(dtrace_fuword8_nocheck) lb a0, 0(a0) RET END(dtrace_fuword8_nocheck) /* uint16_t dtrace_fuword16_nocheck(void *addr) */ ENTRY(dtrace_fuword16_nocheck) lh a0, 0(a0) RET END(dtrace_fuword16_nocheck) /* uint32_t dtrace_fuword32_nocheck(void *addr) */ ENTRY(dtrace_fuword32_nocheck) lw a0, 0(a0) RET END(dtrace_fuword32_nocheck) /* uint64_t dtrace_fuword64_nocheck(void *addr) */ ENTRY(dtrace_fuword64_nocheck) ld a0, 0(a0) RET END(dtrace_fuword64_nocheck) /* void dtrace_copy(uintptr_t uaddr, uintptr_t kaddr, size_t size) */ ENTRY(dtrace_copy) beqz a2, 2f /* If len == 0 then skip loop */ 1: lb a4, 0(a0) /* Load from uaddr */ addi a0, a0, 1 sb a4, 0(a1) /* Store in kaddr */ addi a1, a1, 1 addi a2, a2, -1 /* len-- */ bnez a2, 1b 2: RET END(dtrace_copy) /* void dtrace_copystr(uintptr_t uaddr, uintptr_t kaddr, size_t size, volatile uint16_t *flags) XXX: Check for flags? */ ENTRY(dtrace_copystr) beqz a2, 2f /* If len == 0 then skip loop */ lb a4, 0(a0) /* Load from uaddr */ addi a0, a0, 1 sb a4, 0(a1) /* Store in kaddr */ addi a1, a1, 1 beqz a4, 2f /* If == 0 then break */ addi a2, a2, -1 /* len-- */ bnez a2, 1b 2: RET END(dtrace_copystr) /* uintptr_t dtrace_caller(int aframes) */ ENTRY(dtrace_caller) li a0, -1 RET END(dtrace_caller) /* uint32_t dtrace_cas32(uint32_t *target, uint32_t cmp, uint32_t new) */ ENTRY(dtrace_cas32) 1: lr.w a3, 0(a0) /* Load target */ bne a3, a1, 2f /* *target != cmp ? return */ sc.w a4, a2, 0(a0) /* Store new to target */ bnez a4, 1b /* Try again if store not succeed */ 2: mv a0, a3 /* Return the value loaded from target */ RET END(dtrace_cas32) /* void * dtrace_casptr(volatile void *target, volatile void *cmp, volatile void *new) */ ENTRY(dtrace_casptr) 1: lr.d a3, 0(a0) /* Load target */ bne a3, a1, 2f /* *target != cmp ? return */ sc.d a4, a2, 0(a0) /* Store new to target */ bnez a4, 1b /* Try again if store not succeed */ 2: mv a0, a3 /* Return the value loaded from target */ RET END(dtrace_casptr) Index: head/sys/cddl/dev/dtrace/riscv/dtrace_subr.c =================================================================== --- head/sys/cddl/dev/dtrace/riscv/dtrace_subr.c (revision 303659) +++ head/sys/cddl/dev/dtrace/riscv/dtrace_subr.c (revision 303660) @@ -1,282 +1,282 @@ /* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END * * Portions Copyright 2016 Ruslan Bukin * * $FreeBSD$ * */ /* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include extern dtrace_id_t dtrace_probeid_error; extern int (*dtrace_invop_jump_addr)(struct trapframe *); extern void dtrace_getnanotime(struct timespec *tsp); int dtrace_invop(uintptr_t, struct trapframe *, uintptr_t); void dtrace_invop_init(void); void dtrace_invop_uninit(void); typedef struct dtrace_invop_hdlr { int (*dtih_func)(uintptr_t, struct trapframe *, uintptr_t); struct dtrace_invop_hdlr *dtih_next; } dtrace_invop_hdlr_t; dtrace_invop_hdlr_t *dtrace_invop_hdlr; int dtrace_invop(uintptr_t addr, struct trapframe *frame, uintptr_t eax) { dtrace_invop_hdlr_t *hdlr; int rval; for (hdlr = dtrace_invop_hdlr; hdlr != NULL; hdlr = hdlr->dtih_next) if ((rval = hdlr->dtih_func(addr, frame, eax)) != 0) return (rval); return (0); } void dtrace_invop_add(int (*func)(uintptr_t, struct trapframe *, uintptr_t)) { dtrace_invop_hdlr_t *hdlr; hdlr = kmem_alloc(sizeof (dtrace_invop_hdlr_t), KM_SLEEP); hdlr->dtih_func = func; hdlr->dtih_next = dtrace_invop_hdlr; dtrace_invop_hdlr = hdlr; } void dtrace_invop_remove(int (*func)(uintptr_t, struct trapframe *, uintptr_t)) { dtrace_invop_hdlr_t *hdlr, *prev; hdlr = dtrace_invop_hdlr; prev = NULL; for (;;) { if (hdlr == NULL) panic("attempt to remove non-existent invop handler"); if (hdlr->dtih_func == func) break; prev = hdlr; hdlr = hdlr->dtih_next; } if (prev == NULL) { ASSERT(dtrace_invop_hdlr == hdlr); dtrace_invop_hdlr = hdlr->dtih_next; } else { ASSERT(dtrace_invop_hdlr != hdlr); prev->dtih_next = hdlr->dtih_next; } kmem_free(hdlr, 0); } /*ARGSUSED*/ void dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit)) { (*func)(0, (uintptr_t)VM_MIN_KERNEL_ADDRESS); } void dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg) { cpuset_t cpus; if (cpu == DTRACE_CPUALL) cpus = all_cpus; else CPU_SETOF(cpu, &cpus); smp_rendezvous_cpus(cpus, smp_no_rendevous_barrier, func, smp_no_rendevous_barrier, arg); } static void dtrace_sync_func(void) { } void dtrace_sync(void) { dtrace_xcall(DTRACE_CPUALL, (dtrace_xcall_t)dtrace_sync_func, NULL); } /* * DTrace needs a high resolution time function which can * be called from a probe context and guaranteed not to have * instrumented with probes itself. * * Returns nanoseconds since boot. */ uint64_t dtrace_gethrtime() { struct timespec curtime; nanouptime(&curtime); return (curtime.tv_sec * 1000000000UL + curtime.tv_nsec); } uint64_t dtrace_gethrestime(void) { struct timespec current_time; dtrace_getnanotime(¤t_time); return (current_time.tv_sec * 1000000000UL + current_time.tv_nsec); } /* Function to handle DTrace traps during probes. See riscv/riscv/trap.c */ int dtrace_trap(struct trapframe *frame, u_int type) { /* * A trap can occur while DTrace executes a probe. Before * executing the probe, DTrace blocks re-scheduling and sets * a flag in it's per-cpu flags to indicate that it doesn't * want to fault. On returning from the probe, the no-fault * flag is cleared and finally re-scheduling is enabled. * * Check if DTrace has enabled 'no-fault' mode: * */ if ((cpu_core[curcpu].cpuc_dtrace_flags & CPU_DTRACE_NOFAULT) != 0) { /* * There are only a couple of trap types that are expected. * All the rest will be handled in the usual way. */ switch (type) { - case EXCP_LOAD_ACCESS_FAULT: - case EXCP_STORE_ACCESS_FAULT: - case EXCP_INSTR_ACCESS_FAULT: + case EXCP_FAULT_LOAD: + case EXCP_FAULT_STORE: + case EXCP_FAULT_FETCH: /* Flag a bad address. */ cpu_core[curcpu].cpuc_dtrace_flags |= CPU_DTRACE_BADADDR; cpu_core[curcpu].cpuc_dtrace_illval = 0; /* * Offset the instruction pointer to the instruction * following the one causing the fault. */ frame->tf_sepc += 4; return (1); default: /* Handle all other traps in the usual way. */ break; } } /* Handle the trap in the usual way. */ return (0); } void dtrace_probe_error(dtrace_state_t *state, dtrace_epid_t epid, int which, int fault, int fltoffs, uintptr_t illval) { dtrace_probe(dtrace_probeid_error, (uint64_t)(uintptr_t)state, (uintptr_t)epid, (uintptr_t)which, (uintptr_t)fault, (uintptr_t)fltoffs); } static int dtrace_invop_start(struct trapframe *frame) { int data, invop, reg, update_sp; register_t arg1, arg2; register_t *sp; uint32_t imm; InstFmt i; int offs; int tmp; invop = dtrace_invop(frame->tf_sepc, frame, frame->tf_sepc); if (invop == RISCV_INSN_RET) { frame->tf_sepc = frame->tf_ra; return (0); } if ((invop & SD_RA_SP_MASK) == SD_RA_SP) { i.word = invop; imm = i.SType.imm0_4 | (i.SType.imm5_11 << 5); sp = (register_t *)((uint8_t *)frame->tf_sp + imm); *sp = frame->tf_ra; frame->tf_sepc += INSN_SIZE; return (0); } return (-1); } void dtrace_invop_init(void) { dtrace_invop_jump_addr = dtrace_invop_start; } void dtrace_invop_uninit(void) { dtrace_invop_jump_addr = 0; } Index: head/sys/conf/ldscript.riscv =================================================================== --- head/sys/conf/ldscript.riscv (revision 303659) +++ head/sys/conf/ldscript.riscv (revision 303660) @@ -1,135 +1,135 @@ /* $FreeBSD$ */ OUTPUT_ARCH(riscv) ENTRY(_start) SEARCH_DIR(/usr/lib); SECTIONS { /* Read-only sections, merged into text segment: */ - . = kernbase + 0x100; + . = kernbase + 0x80000000 /* KERNENTRY */; .text : AT(ADDR(.text) - kernbase) { *(.text) *(.stub) /* .gnu.warning sections are handled specially by elf32.em. */ *(.gnu.warning) *(.gnu.linkonce.t*) } =0x9090 _etext = .; PROVIDE (etext = .); .fini : { *(.fini) } =0x9090 .rodata : { *(.rodata) *(.gnu.linkonce.r*) } .rodata1 : { *(.rodata1) } .interp : { *(.interp) } .hash : { *(.hash) } .dynsym : { *(.dynsym) } .dynstr : { *(.dynstr) } .gnu.version : { *(.gnu.version) } .gnu.version_d : { *(.gnu.version_d) } .gnu.version_r : { *(.gnu.version_r) } .rel.text : { *(.rel.text) *(.rel.gnu.linkonce.t*) } .rela.text : { *(.rela.text) *(.rela.gnu.linkonce.t*) } .rel.data : { *(.rel.data) *(.rel.gnu.linkonce.d*) } .rela.data : { *(.rela.data) *(.rela.gnu.linkonce.d*) } .rel.rodata : { *(.rel.rodata) *(.rel.gnu.linkonce.r*) } .rela.rodata : { *(.rela.rodata) *(.rela.gnu.linkonce.r*) } .rel.got : { *(.rel.got) } .rela.got : { *(.rela.got) } .rel.ctors : { *(.rel.ctors) } .rela.ctors : { *(.rela.ctors) } .rel.dtors : { *(.rel.dtors) } .rela.dtors : { *(.rela.dtors) } .rel.init : { *(.rel.init) } .rela.init : { *(.rela.init) } .rel.fini : { *(.rel.fini) } .rela.fini : { *(.rela.fini) } .rel.bss : { *(.rel.bss) } .rela.bss : { *(.rela.bss) } .rel.plt : { *(.rel.plt) } .rela.plt : { *(.rela.plt) } .init : { *(.init) } =0x9090 .plt : { *(.plt) } /* Adjust the address for the data segment. We want to adjust up to the same address within the page on the next page up. */ . = ALIGN(0x1000) + (. & (0x1000 - 1)) ; .data : { *(.data) *(.gnu.linkonce.d*) } .data1 : { *(.data1) } . = ALIGN(32 / 8); _start_ctors = .; PROVIDE (start_ctors = .); .ctors : { *(.ctors) } _stop_ctors = .; PROVIDE (stop_ctors = .); .dtors : { *(.dtors) } .got : { *(.got.plt) *(.got) } .dynamic : { *(.dynamic) } /* We want the small data sections together, so single-instruction offsets can access them all, and initialized data all before uninitialized, so we can shorten the on-disk segment size. */ . = ALIGN(8); .sdata : { *(.sdata) } _edata = .; PROVIDE (edata = .); __bss_start = .; .sbss : { *(.sbss) *(.scommon) } .bss : { *(.dynbss) *(.bss) *(COMMON) } . = ALIGN(8); _end = . ; PROVIDE (end = .); /* Stabs debugging sections. */ .stab 0 : { *(.stab) } .stabstr 0 : { *(.stabstr) } .stab.excl 0 : { *(.stab.excl) } .stab.exclstr 0 : { *(.stab.exclstr) } .stab.index 0 : { *(.stab.index) } .stab.indexstr 0 : { *(.stab.indexstr) } .comment 0 : { *(.comment) } /* DWARF debug sections. Symbols in the DWARF debugging sections are relative to the beginning of the section so we begin them at 0. */ /* DWARF 1 */ .debug 0 : { *(.debug) } .line 0 : { *(.line) } /* GNU DWARF 1 extensions */ .debug_srcinfo 0 : { *(.debug_srcinfo) } .debug_sfnames 0 : { *(.debug_sfnames) } /* DWARF 1.1 and DWARF 2 */ .debug_aranges 0 : { *(.debug_aranges) } .debug_pubnames 0 : { *(.debug_pubnames) } /* DWARF 2 */ .debug_info 0 : { *(.debug_info) } .debug_abbrev 0 : { *(.debug_abbrev) } .debug_line 0 : { *(.debug_line) } .debug_frame 0 : { *(.debug_frame) } .debug_str 0 : { *(.debug_str) } .debug_loc 0 : { *(.debug_loc) } .debug_macinfo 0 : { *(.debug_macinfo) } /* SGI/MIPS DWARF 2 extensions */ .debug_weaknames 0 : { *(.debug_weaknames) } .debug_funcnames 0 : { *(.debug_funcnames) } .debug_typenames 0 : { *(.debug_typenames) } .debug_varnames 0 : { *(.debug_varnames) } /* These must appear regardless of . */ } Index: head/sys/riscv/conf/GENERIC =================================================================== --- head/sys/riscv/conf/GENERIC (revision 303659) +++ head/sys/riscv/conf/GENERIC (revision 303660) @@ -1,112 +1,113 @@ # # GENERIC -- Generic kernel configuration file for FreeBSD/RISC-V # # For more information on this file, please read the config(5) manual page, # and/or the handbook section on Kernel Configuration Files: # # http://www.FreeBSD.org/doc/en_US.ISO8859-1/books/handbook/kernelconfig-config.html # # The handbook is also available locally in /usr/share/doc/handbook # if you've installed the doc distribution, otherwise always see the # FreeBSD World Wide Web server (http://www.FreeBSD.org/) for the # latest information. # # An exhaustive list of options and more detailed explanations of the # device lines is also present in the ../../conf/NOTES and NOTES files. # If you are in doubt as to the purpose or necessity of a line, check first # in NOTES. # # $FreeBSD$ cpu RISCV ident GENERIC makeoptions DEBUG=-g # Build kernel with gdb(1) debug symbols # makeoptions WITH_CTF=1 # Run ctfconvert(1) for DTrace support # FIXME: linker error. "--relax and -r may not be used together" makeoptions WITHOUT_MODULES="usb otusfw mwlfw ispfw mwlfw ralfw rtwnfw urtwnfw" +# makeoptions NO_MODULES options SCHED_ULE # ULE scheduler options PREEMPTION # Enable kernel thread preemption options INET # InterNETworking options INET6 # IPv6 communications protocols options IPSEC # IP (v4/v6) security options TCP_OFFLOAD # TCP offload options SCTP # Stream Control Transmission Protocol options FFS # Berkeley Fast Filesystem options SOFTUPDATES # Enable FFS soft updates support options UFS_ACL # Support for access control lists options UFS_DIRHASH # Improve performance on big directories options UFS_GJOURNAL # Enable gjournal-based UFS journaling options QUOTA # Enable disk quotas for UFS -options MD_ROOT # MD is a potential root device options NFSCL # Network Filesystem Client options NFSD # Network Filesystem Server options NFSLOCKD # Network Lock Manager options NFS_ROOT # NFS usable as /, requires NFSCL options MSDOSFS # MSDOS Filesystem options CD9660 # ISO 9660 Filesystem options PROCFS # Process filesystem (requires PSEUDOFS) options PSEUDOFS # Pseudo-filesystem framework options GEOM_PART_GPT # GUID Partition Tables. # options GEOM_RAID # Soft RAID functionality. options GEOM_LABEL # Provides labelization options SCSI_DELAY=5000 # Delay (in ms) before probing SCSI options KTRACE # ktrace(1) support # options STACK # stack(9) support options SYSVSHM # SYSV-style shared memory options SYSVMSG # SYSV-style message queues options SYSVSEM # SYSV-style semaphores options _KPOSIX_PRIORITY_SCHEDULING # POSIX P1003_1B real-time extensions options PRINTF_BUFR_SIZE=128 # Prevent printf output being interspersed. options KBD_INSTALL_CDEV # install a CDEV entry in /dev # options HWPMC_HOOKS # Necessary kernel hooks for hwpmc(4) options AUDIT # Security event auditing options CAPABILITY_MODE # Capsicum capability mode options CAPABILITIES # Capsicum capabilities options MAC # TrustedBSD MAC Framework options KDTRACE_FRAME # Ensure frames are compiled in options KDTRACE_HOOKS # Kernel DTrace hooks # options VFP # Floating-point support options RACCT # Resource accounting framework options RACCT_DEFAULT_TO_DISABLED # Set kern.racct.enable=0 by default options RCTL # Resource limits options SMP # Uncomment for memory disk # options MD_ROOT # options MD_ROOT_SIZE=8192 # 8MB ram disk # makeoptions MFS_IMAGE=/path/to/img # options ROOTDEVNAME=\"ufs:/dev/md0\" # Debugging support. Always need this: options KDB # Enable kernel debugger support. options KDB_TRACE # Print a stack trace for a panic. # For full debugger support use (turn off in stable branch): options DDB # Support DDB. # options GDB # Support remote GDB. options DEADLKRES # Enable the deadlock resolver options INVARIANTS # Enable calls of extra sanity checking options INVARIANT_SUPPORT # Extra sanity checks of internal structures, required by INVARIANTS # options WITNESS # Enable checks to detect deadlocks and cycles # options WITNESS_SKIPSPIN # Don't run witness on spinlocks for speed options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones # options EARLY_PRINTF +# options VERBOSE_SYSINIT # Pseudo devices. device loop # Network loopback device random # Entropy device device ether # Ethernet support device vlan # 802.1Q VLAN support device tun # Packet tunnel. device md # Memory "disks" device gif # IPv6 and IPv4 tunneling device firmware # firmware assist module # The `bpf' device enables the Berkeley Packet Filter. # Be aware of the administrative consequences of enabling this! # Note that 'bpf' is required for DHCP. device bpf # Berkeley packet filter options FDT Index: head/sys/riscv/htif/htif.c =================================================================== --- head/sys/riscv/htif/htif.c (revision 303659) +++ head/sys/riscv/htif/htif.c (revision 303660) @@ -1,283 +1,278 @@ /*- * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "htif.h" static struct resource_spec htif_spec[] = { { SYS_RES_IRQ, 0, RF_ACTIVE | RF_SHAREABLE}, { -1, 0 } }; struct intr_entry { void (*func) (void *, uint64_t); void *arg; }; struct intr_entry intrs[HTIF_NDEV]; uint64_t htif_command(uint64_t arg) { return (machine_command(ECALL_HTIF_CMD, arg)); } int htif_setup_intr(int id, void *func, void *arg) { if (id >= HTIF_NDEV) return (-1); intrs[id].func = func; intrs[id].arg = arg; return (0); } static void htif_handle_entry(struct htif_softc *sc) { uint64_t entry; uint8_t devcmd; uint8_t devid; entry = machine_command(ECALL_HTIF_GET_ENTRY, 0); while (entry) { devid = HTIF_DEV_ID(entry); devcmd = HTIF_DEV_CMD(entry); if (devcmd == HTIF_CMD_IDENTIFY) { /* Enumeration interrupt */ if (devid == sc->identify_id) sc->identify_done = 1; } else { /* Device interrupt */ if (intrs[devid].func != NULL) intrs[devid].func(intrs[devid].arg, entry); } entry = machine_command(ECALL_HTIF_GET_ENTRY, 0); } } static int htif_intr(void *arg) { struct htif_softc *sc; sc = arg; csr_clear(sip, SIP_SSIP); htif_handle_entry(sc); return (FILTER_HANDLED); } static int htif_add_device(struct htif_softc *sc, int i, char *id, char *name) { struct htif_dev_ivars *di; di = malloc(sizeof(struct htif_dev_ivars), M_DEVBUF, M_WAITOK | M_ZERO); di->sc = sc; di->index = i; di->id = malloc(HTIF_ID_LEN, M_DEVBUF, M_WAITOK | M_ZERO); memcpy(di->id, id, HTIF_ID_LEN); di->dev = device_add_child(sc->dev, name, -1); device_set_ivars(di->dev, di); return (0); } static int htif_enumerate(struct htif_softc *sc) { char id[HTIF_ID_LEN] __aligned(HTIF_ALIGN); uint64_t paddr; uint64_t data; uint64_t cmd; int len; int i; device_printf(sc->dev, "Enumerating devices\n"); for (i = 0; i < HTIF_NDEV; i++) { paddr = pmap_kextract((vm_offset_t)&id); data = (paddr << IDENTIFY_PADDR_SHIFT); data |= IDENTIFY_IDENT; sc->identify_id = i; sc->identify_done = 0; cmd = i; cmd <<= HTIF_DEV_ID_SHIFT; cmd |= (HTIF_CMD_IDENTIFY << HTIF_CMD_SHIFT); cmd |= data; htif_command(cmd); - /* Do poll as interrupts are disabled yet */ - while (sc->identify_done == 0) { - htif_handle_entry(sc); - } - len = strnlen(id, sizeof(id)); if (len <= 0) break; if (bootverbose) printf(" %d %s\n", i, id); if (strncmp(id, "disk", 4) == 0) htif_add_device(sc, i, id, "htif_blk"); else if (strncmp(id, "bcd", 3) == 0) htif_add_device(sc, i, id, "htif_console"); else if (strncmp(id, "syscall_proxy", 13) == 0) htif_add_device(sc, i, id, "htif_syscall_proxy"); } return (bus_generic_attach(sc->dev)); } int htif_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) { struct htif_dev_ivars *ivars; ivars = device_get_ivars(child); switch (which) { case HTIF_IVAR_INDEX: *result = ivars->index; break; case HTIF_IVAR_ID: *result = (uintptr_t)ivars->id; default: return (EINVAL); } return (0); } static int htif_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (!ofw_bus_is_compatible(dev, "riscv,htif")) return (ENXIO); device_set_desc(dev, "HTIF bus device"); return (BUS_PROBE_DEFAULT); } static int htif_attach(device_t dev) { struct htif_softc *sc; int error; sc = device_get_softc(dev); sc->dev = dev; if (bus_alloc_resources(dev, htif_spec, sc->res)) { device_printf(dev, "could not allocate resources\n"); return (ENXIO); } /* Setup IRQs handler */ error = bus_setup_intr(dev, sc->res[0], INTR_TYPE_CLK, htif_intr, NULL, sc, &sc->ihl[0]); if (error) { device_printf(dev, "Unable to alloc int resource.\n"); return (ENXIO); } csr_set(sie, SIE_SSIE); return (htif_enumerate(sc)); } static device_method_t htif_methods[] = { DEVMETHOD(device_probe, htif_probe), DEVMETHOD(device_attach, htif_attach), /* Bus interface */ DEVMETHOD(bus_read_ivar, htif_read_ivar), DEVMETHOD_END }; static driver_t htif_driver = { "htif", htif_methods, sizeof(struct htif_softc) }; static devclass_t htif_devclass; DRIVER_MODULE(htif, simplebus, htif_driver, htif_devclass, 0, 0); Index: head/sys/riscv/htif/htif_block.c =================================================================== --- head/sys/riscv/htif/htif_block.c (revision 303659) +++ head/sys/riscv/htif/htif_block.c (revision 303660) @@ -1,297 +1,299 @@ /*- * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "htif.h" #define SECTOR_SIZE_SHIFT (9) #define SECTOR_SIZE (1 << SECTOR_SIZE_SHIFT) #define HTIF_BLK_LOCK(_sc) mtx_lock(&(_sc)->sc_mtx) #define HTIF_BLK_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_mtx) #define HTIF_BLK_LOCK_INIT(_sc) \ mtx_init(&_sc->sc_mtx, device_get_nameunit(_sc->dev), \ "htif_blk", MTX_DEF) #define HTIF_BLK_LOCK_DESTROY(_sc) mtx_destroy(&_sc->sc_mtx); #define HTIF_BLK_ASSERT_LOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_OWNED); #define HTIF_BLK_ASSERT_UNLOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_NOTOWNED); static void htif_blk_task(void *arg); static disk_open_t htif_blk_open; static disk_close_t htif_blk_close; static disk_strategy_t htif_blk_strategy; struct htif_blk_softc { device_t dev; struct disk *disk; struct mtx htif_io_mtx; struct mtx sc_mtx; struct proc *p; struct bio_queue_head bio_queue; int running; int intr_chan; int cmd_done; int index; uint16_t curtag; }; struct htif_blk_request { uint64_t addr; uint64_t offset; /* offset in bytes */ uint64_t size; /* length in bytes */ uint64_t tag; }; static void htif_blk_intr(void *arg, uint64_t entry) { struct htif_blk_softc *sc; uint64_t devcmd; uint64_t data; sc = arg; devcmd = HTIF_DEV_CMD(entry); data = HTIF_DEV_DATA(entry); if (sc->curtag == data) { wmb(); sc->cmd_done = 1; wakeup(&sc->intr_chan); } else { device_printf(sc->dev, "Unexpected tag %d (should be %d)\n", data, sc->curtag); } } static int htif_blk_probe(device_t dev) { return (0); } static int htif_blk_attach(device_t dev) { struct htif_blk_softc *sc; char prefix[] = " size="; char *str; long size; sc = device_get_softc(dev); sc->dev = dev; mtx_init(&sc->htif_io_mtx, device_get_nameunit(dev), "htif_blk", MTX_DEF); HTIF_BLK_LOCK_INIT(sc); str = strstr(htif_get_id(dev), prefix); size = strtol((str + 6), NULL, 10); if (size == 0) { return (ENXIO); } sc->index = htif_get_index(dev); if (sc->index < 0) return (EINVAL); htif_setup_intr(sc->index, htif_blk_intr, sc); sc->disk = disk_alloc(); sc->disk->d_drv1 = sc; sc->disk->d_maxsize = 4096; /* Max transfer */ sc->disk->d_name = "htif_blk"; sc->disk->d_open = htif_blk_open; sc->disk->d_close = htif_blk_close; sc->disk->d_strategy = htif_blk_strategy; sc->disk->d_unit = 0; sc->disk->d_sectorsize = SECTOR_SIZE; sc->disk->d_mediasize = size; disk_create(sc->disk, DISK_VERSION); bioq_init(&sc->bio_queue); sc->running = 1; kproc_create(&htif_blk_task, sc, &sc->p, 0, 0, "%s: transfer", device_get_nameunit(dev)); return (0); } static int htif_blk_open(struct disk *dp) { return (0); } static int htif_blk_close(struct disk *dp) { return (0); } static void htif_blk_task(void *arg) { struct htif_blk_request req __aligned(HTIF_ALIGN); struct htif_blk_softc *sc; uint64_t req_paddr; struct bio *bp; uint64_t paddr; + uint64_t resp; uint64_t cmd; int i; sc = (struct htif_blk_softc *)arg; while (1) { HTIF_BLK_LOCK(sc); do { bp = bioq_takefirst(&sc->bio_queue); if (bp == NULL) msleep(sc, &sc->sc_mtx, PRIBIO, "jobqueue", 0); } while (bp == NULL); HTIF_BLK_UNLOCK(sc); if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) { HTIF_BLK_LOCK(sc); rmb(); req.offset = (bp->bio_pblkno * sc->disk->d_sectorsize); req.size = bp->bio_bcount; paddr = vtophys(bp->bio_data); KASSERT(paddr != 0, ("paddr is 0")); req.addr = paddr; sc->curtag++; req.tag = sc->curtag; cmd = sc->index; cmd <<= HTIF_DEV_ID_SHIFT; if (bp->bio_cmd == BIO_READ) cmd |= (HTIF_CMD_READ << HTIF_CMD_SHIFT); else cmd |= (HTIF_CMD_WRITE << HTIF_CMD_SHIFT); req_paddr = vtophys(&req); KASSERT(req_paddr != 0, ("req_paddr is 0")); cmd |= req_paddr; sc->cmd_done = 0; - htif_command(cmd); + resp = htif_command(cmd); + htif_blk_intr(sc, resp); /* Wait for interrupt */ i = 0; while (sc->cmd_done == 0) { msleep(&sc->intr_chan, &sc->sc_mtx, PRIBIO, "intr", hz/2); if (i++ > 2) { /* TODO: try to re-issue operation on timeout ? */ bp->bio_error = EIO; bp->bio_flags |= BIO_ERROR; disk_err(bp, "hard error", -1, 1); break; } } HTIF_BLK_UNLOCK(sc); biodone(bp); } else { printf("unknown op %d\n", bp->bio_cmd); } } } static void htif_blk_strategy(struct bio *bp) { struct htif_blk_softc *sc; sc = bp->bio_disk->d_drv1; HTIF_BLK_LOCK(sc); if (sc->running > 0) { bioq_disksort(&sc->bio_queue, bp); HTIF_BLK_UNLOCK(sc); wakeup(sc); } else { HTIF_BLK_UNLOCK(sc); biofinish(bp, NULL, ENXIO); } } static device_method_t htif_blk_methods[] = { DEVMETHOD(device_probe, htif_blk_probe), DEVMETHOD(device_attach, htif_blk_attach), }; static driver_t htif_blk_driver = { "htif_blk", htif_blk_methods, sizeof(struct htif_blk_softc) }; static devclass_t htif_blk_devclass; DRIVER_MODULE(htif_blk, htif, htif_blk_driver, htif_blk_devclass, 0, 0); Index: head/sys/riscv/htif/htif_console.c =================================================================== --- head/sys/riscv/htif/htif_console.c (revision 303659) +++ head/sys/riscv/htif/htif_console.c (revision 303660) @@ -1,396 +1,349 @@ /*- * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "htif.h" #include #include -extern uint64_t console_intr; - static tsw_outwakeup_t riscvtty_outwakeup; static struct ttydevsw riscv_ttydevsw = { .tsw_flags = TF_NOPREFIX, .tsw_outwakeup = riscvtty_outwakeup, }; static int polltime; static struct callout riscv_callout; static struct tty *tp = NULL; #if defined(KDB) static int alt_break_state; #endif static void riscv_timeout(void *); static cn_probe_t riscv_cnprobe; static cn_init_t riscv_cninit; static cn_term_t riscv_cnterm; static cn_getc_t riscv_cngetc; static cn_putc_t riscv_cnputc; static cn_grab_t riscv_cngrab; static cn_ungrab_t riscv_cnungrab; CONSOLE_DRIVER(riscv); #define MAX_BURST_LEN 1 #define QUEUE_SIZE 256 #define CONSOLE_DEFAULT_ID 1ul #define SPIN_IN_MACHINE_MODE 1 struct queue_entry { uint64_t data; uint64_t used; struct queue_entry *next; }; struct queue_entry cnqueue[QUEUE_SIZE]; struct queue_entry *entry_last; struct queue_entry *entry_served; static void -htif_putc(int c) +riscv_putc(int c) { uint64_t cmd; cmd = (HTIF_CMD_WRITE << HTIF_CMD_SHIFT); cmd |= (CONSOLE_DEFAULT_ID << HTIF_DEV_ID_SHIFT); cmd |= c; -#ifdef SPIN_IN_MACHINE_MODE - machine_command(ECALL_HTIF_LOWPUTC, cmd); -#else - htif_command(cmd); -#endif - + machine_command(ECALL_HTIF_CMD, cmd); } -static uint8_t -htif_getc(void) -{ - uint64_t cmd; - uint8_t res; - - cmd = (HTIF_CMD_READ << HTIF_CMD_SHIFT); - cmd |= (CONSOLE_DEFAULT_ID << HTIF_DEV_ID_SHIFT); - - res = htif_command(cmd); - - return (res); -} - -static void -riscv_putc(int c) -{ - uint64_t counter; - uint64_t *cc; - uint64_t val; - - val = 0; - counter = 0; - - cc = (uint64_t*)&console_intr; - *cc = 0; - - htif_putc(c); - -#ifndef SPIN_IN_MACHINE_MODE - /* Wait for an interrupt */ - __asm __volatile( - "li %0, 1\n" /* counter = 1 */ - "slli %0, %0, 12\n" /* counter <<= 12 */ - "1:" - "addi %0, %0, -1\n" /* counter -= 1 */ - "beqz %0, 2f\n" /* counter == 0 ? finish */ - "ld %1, 0(%2)\n" /* val = *cc */ - "beqz %1, 1b\n" /* val == 0 ? repeat */ - "2:" - : "=&r"(counter), "=&r"(val) : "r"(cc) - ); -#endif -} - #ifdef EARLY_PRINTF early_putc_t *early_putc = riscv_putc; #endif static void cn_drvinit(void *unused) { if (riscv_consdev.cn_pri != CN_DEAD && riscv_consdev.cn_name[0] != '\0') { tp = tty_alloc(&riscv_ttydevsw, NULL); tty_init_console(tp, 0); tty_makedev(tp, NULL, "%s", "rcons"); polltime = 1; callout_init(&riscv_callout, 1); callout_reset(&riscv_callout, polltime, riscv_timeout, NULL); } } SYSINIT(cndev, SI_SUB_CONFIGURE, SI_ORDER_MIDDLE, cn_drvinit, NULL); static void riscvtty_outwakeup(struct tty *tp) { u_char buf[MAX_BURST_LEN]; int len; int i; for (;;) { len = ttydisc_getc(tp, buf, sizeof(buf)); if (len == 0) break; KASSERT(len == 1, ("tty error")); for (i = 0; i < len; i++) riscv_putc(buf[i]); } } static void riscv_timeout(void *v) { int c; tty_lock(tp); while ((c = riscv_cngetc(NULL)) != -1) ttydisc_rint(tp, c, 0); ttydisc_rint_done(tp); tty_unlock(tp); callout_reset(&riscv_callout, polltime, riscv_timeout, NULL); } static void riscv_cnprobe(struct consdev *cp) { cp->cn_pri = CN_NORMAL; } static void riscv_cninit(struct consdev *cp) { int i; strcpy(cp->cn_name, "rcons"); for (i = 0; i < QUEUE_SIZE; i++) { if (i == (QUEUE_SIZE - 1)) cnqueue[i].next = &cnqueue[0]; else cnqueue[i].next = &cnqueue[i+1]; cnqueue[i].data = 0; cnqueue[i].used = 0; } entry_last = &cnqueue[0]; entry_served = &cnqueue[0]; } static void riscv_cnterm(struct consdev *cp) { } static void riscv_cngrab(struct consdev *cp) { } static void riscv_cnungrab(struct consdev *cp) { } static int riscv_cngetc(struct consdev *cp) { #if defined(KDB) uint64_t devcmd; uint64_t entry; uint64_t devid; #endif + uint64_t cmd; uint8_t data; int ch; - htif_getc(); + cmd = (HTIF_CMD_READ << HTIF_CMD_SHIFT); + cmd |= (CONSOLE_DEFAULT_ID << HTIF_DEV_ID_SHIFT); + machine_command(ECALL_HTIF_CMD_REQ, cmd); + #if defined(KDB) if (kdb_active) { - entry = machine_command(ECALL_HTIF_GET_ENTRY, 0); + + entry = machine_command(ECALL_HTIF_CMD_RESP, 0); while (entry) { devid = HTIF_DEV_ID(entry); devcmd = HTIF_DEV_CMD(entry); data = HTIF_DEV_DATA(entry); if (devid == CONSOLE_DEFAULT_ID && devcmd == 0) { entry_last->data = data; entry_last->used = 1; entry_last = entry_last->next; } else { printf("Lost interrupt: devid %d\n", devid); } - entry = machine_command(ECALL_HTIF_GET_ENTRY, 0); + entry = machine_command(ECALL_HTIF_CMD_RESP, 0); } } #endif if (entry_served->used == 1) { data = entry_served->data; entry_served->used = 0; entry_served = entry_served->next; ch = (data & 0xff); if (ch > 0 && ch < 0xff) { #if defined(KDB) kdb_alt_break(ch, &alt_break_state); #endif return (ch); } } return (-1); } static void riscv_cnputc(struct consdev *cp, int c) { riscv_putc(c); } /* * Bus interface. */ struct htif_console_softc { device_t dev; int running; int intr_chan; int cmd_done; int curtag; int index; }; static void htif_console_intr(void *arg, uint64_t entry) { struct htif_console_softc *sc; uint8_t devcmd; uint64_t data; sc = arg; devcmd = HTIF_DEV_CMD(entry); data = HTIF_DEV_DATA(entry); if (devcmd == 0) { entry_last->data = data; entry_last->used = 1; entry_last = entry_last->next; } } static int htif_console_probe(device_t dev) { return (0); } static int htif_console_attach(device_t dev) { struct htif_console_softc *sc; sc = device_get_softc(dev); sc->dev = dev; sc->index = htif_get_index(dev); if (sc->index < 0) return (EINVAL); htif_setup_intr(sc->index, htif_console_intr, sc); return (0); } static device_method_t htif_console_methods[] = { DEVMETHOD(device_probe, htif_console_probe), DEVMETHOD(device_attach, htif_console_attach), DEVMETHOD_END }; static driver_t htif_console_driver = { "htif_console", htif_console_methods, sizeof(struct htif_console_softc) }; static devclass_t htif_console_devclass; DRIVER_MODULE(htif_console, htif, htif_console_driver, htif_console_devclass, 0, 0); Index: head/sys/riscv/include/cpu.h =================================================================== --- head/sys/riscv/include/cpu.h (revision 303659) +++ head/sys/riscv/include/cpu.h (revision 303660) @@ -1,95 +1,94 @@ /*- - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_CPU_H_ #define _MACHINE_CPU_H_ #include #include #define TRAPF_PC(tfp) ((tfp)->tf_ra) #define TRAPF_USERMODE(tfp) (((tfp)->tf_sepc & (1ul << 63)) == 0) #define cpu_getstack(td) ((td)->td_frame->tf_sp) #define cpu_setstack(td, sp) ((td)->td_frame->tf_sp = (sp)) #define cpu_spinwait() /* nothing */ #ifdef _KERNEL /* * 0x0000 CPU ID unimplemented * 0x0001 UC Berkeley Rocket repo * 0x0002­0x7FFE Reserved for open-source repos * 0x7FFF Reserved for extension * 0x8000 Reserved for anonymous source * 0x8001­0xFFFE Reserved for proprietary implementations * 0xFFFF Reserved for extension */ #define CPU_IMPL_SHIFT 0 #define CPU_IMPL_MASK (0xffff << CPU_IMPL_SHIFT) #define CPU_IMPL(mimpid) ((mimpid & CPU_IMPL_MASK) >> CPU_IMPL_SHIFT) #define CPU_IMPL_UNIMPLEMEN 0x0 #define CPU_IMPL_UCB_ROCKET 0x1 #define CPU_PART_SHIFT 62 #define CPU_PART_MASK (0x3ul << CPU_PART_SHIFT) -#define CPU_PART(mcpuid) ((mcpuid & CPU_PART_MASK) >> CPU_PART_SHIFT) -#define CPU_PART_RV32I 0x0 -#define CPU_PART_RV32E 0x1 -#define CPU_PART_RV64I 0x2 -#define CPU_PART_RV128I 0x3 +#define CPU_PART(misa) ((misa & CPU_PART_MASK) >> CPU_PART_SHIFT) +#define CPU_PART_RV32 0x1 +#define CPU_PART_RV64 0x2 +#define CPU_PART_RV128 0x3 extern char btext[]; extern char etext[]; void cpu_halt(void) __dead2; void cpu_reset(void) __dead2; void fork_trampoline(void); void identify_cpu(void); void swi_vm(void *v); static __inline uint64_t get_cyclecount(void) { /* TODO: This is bogus */ return (1); } #endif #endif /* !_MACHINE_CPU_H_ */ Index: head/sys/riscv/include/cpufunc.h =================================================================== --- head/sys/riscv/include/cpufunc.h (revision 303659) +++ head/sys/riscv/include/cpufunc.h (revision 303660) @@ -1,123 +1,124 @@ /*- - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_CPUFUNC_H_ #define _MACHINE_CPUFUNC_H_ #ifdef _KERNEL #include static __inline void breakpoint(void) { __asm("ebreak"); } static __inline register_t intr_disable(void) { uint64_t ret; __asm __volatile( - "csrrci %0, sstatus, 1" - : "=&r" (ret) + "csrrci %0, sstatus, %1" + : "=&r" (ret) : "i" (SSTATUS_SIE) ); - return (ret & SSTATUS_IE); + return (ret & (SSTATUS_SIE)); } static __inline void intr_restore(register_t s) { __asm __volatile( "csrs sstatus, %0" :: "r" (s) ); } static __inline void intr_enable(void) { __asm __volatile( - "csrsi sstatus, 1" + "csrsi sstatus, %0" + :: "i" (SSTATUS_SIE) ); } static __inline register_t machine_command(uint64_t cmd, uint64_t arg) { uint64_t res; __asm __volatile( "mv t5, %2\n" "mv t6, %1\n" "ecall\n" "mv %0, t6" : "=&r"(res) : "r"(arg), "r"(cmd) ); return (res); } #define cpu_nullop() riscv_nullop() #define cpufunc_nullop() riscv_nullop() #define cpu_setttb(a) riscv_setttb(a) #define cpu_tlb_flushID() riscv_tlb_flushID() #define cpu_tlb_flushID_SE(e) riscv_tlb_flushID_SE(e) #define cpu_dcache_wbinv_range(a, s) riscv_dcache_wbinv_range((a), (s)) #define cpu_dcache_inv_range(a, s) riscv_dcache_inv_range((a), (s)) #define cpu_dcache_wb_range(a, s) riscv_dcache_wb_range((a), (s)) #define cpu_idcache_wbinv_range(a, s) riscv_idcache_wbinv_range((a), (s)) #define cpu_icache_sync_range(a, s) riscv_icache_sync_range((a), (s)) void riscv_nullop(void); void riscv_setttb(vm_offset_t); void riscv_tlb_flushID(void); void riscv_tlb_flushID_SE(vm_offset_t); void riscv_icache_sync_range(vm_offset_t, vm_size_t); void riscv_idcache_wbinv_range(vm_offset_t, vm_size_t); void riscv_dcache_wbinv_range(vm_offset_t, vm_size_t); void riscv_dcache_inv_range(vm_offset_t, vm_size_t); void riscv_dcache_wb_range(vm_offset_t, vm_size_t); #endif /* _KERNEL */ #endif /* _MACHINE_CPUFUNC_H_ */ Index: head/sys/riscv/include/db_machdep.h =================================================================== --- head/sys/riscv/include/db_machdep.h (revision 303659) +++ head/sys/riscv/include/db_machdep.h (revision 303660) @@ -1,91 +1,91 @@ /*- * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_DB_MACHDEP_H_ #define _MACHINE_DB_MACHDEP_H_ #include #include #include -#define T_BREAKPOINT (EXCP_INSTR_BREAKPOINT) +#define T_BREAKPOINT (EXCP_BREAKPOINT) #define T_WATCHPOINT (0) typedef vm_offset_t db_addr_t; typedef long db_expr_t; #define PC_REGS() ((db_addr_t)kdb_thrctx->pcb_sepc) #define BKPT_INST (0x00100073) #define BKPT_SIZE (INSN_SIZE) #define BKPT_SET(inst) (BKPT_INST) #define BKPT_SKIP do { \ kdb_frame->tf_sepc += BKPT_SIZE; \ } while (0) #define db_clear_single_step kdb_cpu_clear_singlestep #define db_set_single_step kdb_cpu_set_singlestep #define IS_BREAKPOINT_TRAP(type, code) (type == T_BREAKPOINT) #define IS_WATCHPOINT_TRAP(type, code) (type == T_WATCHPOINT) #define inst_trap_return(ins) (ins == 0x10000073) /* eret */ #define inst_return(ins) (ins == 0x00008067) /* ret */ #define inst_call(ins) (((ins) & 0x7f) == 111 || \ ((ins) & 0x7f) == 103) /* jal, jalr */ #define inst_load(ins) ({ \ uint32_t tmp_instr = db_get_value(PC_REGS(), sizeof(uint32_t), FALSE); \ is_load_instr(tmp_instr); \ }) #define inst_store(ins) ({ \ uint32_t tmp_instr = db_get_value(PC_REGS(), sizeof(uint32_t), FALSE); \ is_store_instr(tmp_instr); \ }) #define is_load_instr(ins) (((ins) & 0x7f) == 3) #define is_store_instr(ins) (((ins) & 0x7f) == 35) #define next_instr_address(pc, bd) ((bd) ? (pc) : ((pc) + 4)) #define DB_SMALL_VALUE_MAX (0x7fffffff) #define DB_SMALL_VALUE_MIN (-0x40001) #define DB_ELFSIZE 64 #endif /* !_MACHINE_DB_MACHDEP_H_ */ Index: head/sys/riscv/include/intr.h =================================================================== --- head/sys/riscv/include/intr.h (revision 303659) +++ head/sys/riscv/include/intr.h (revision 303660) @@ -1,68 +1,80 @@ /*- * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_INTR_MACHDEP_H_ #define _MACHINE_INTR_MACHDEP_H_ struct trapframe; void riscv_init_interrupts(void); int riscv_teardown_intr(void *); int riscv_config_intr(u_int, enum intr_trigger, enum intr_polarity); int riscv_setup_intr(const char *, driver_filter_t *, driver_intr_t *, void *, int, int, void **); void riscv_cpu_intr(struct trapframe *); typedef unsigned long * riscv_intrcnt_t; riscv_intrcnt_t riscv_intrcnt_create(const char *); void riscv_intrcnt_setname(riscv_intrcnt_t, const char *); #ifdef SMP void riscv_setup_ipihandler(driver_filter_t *); void riscv_unmask_ipi(void); #endif enum { - IRQ_SOFTWARE, - IRQ_TIMER, - IRQ_HTIF, + IRQ_SOFTWARE_USER, + IRQ_SOFTWARE_SUPERVISOR, + IRQ_SOFTWARE_HYPERVISOR, + IRQ_SOFTWARE_MACHINE, + IRQ_TIMER_USER, + IRQ_TIMER_SUPERVISOR, + IRQ_TIMER_HYPERVISOR, + IRQ_TIMER_MACHINE, + IRQ_EXTERNAL_USER, + IRQ_EXTERNAL_SUPERVISOR, + IRQ_EXTERNAL_HYPERVISOR, + IRQ_EXTERNAL_MACHINE, +#if 0 + /* lowRISC TODO */ IRQ_COP, /* lowRISC only */ IRQ_UART, /* lowRISC only */ +#endif NIRQS }; #endif /* !_MACHINE_INTR_MACHDEP_H_ */ Index: head/sys/riscv/include/pte.h =================================================================== --- head/sys/riscv/include/pte.h (revision 303659) +++ head/sys/riscv/include/pte.h (revision 303660) @@ -1,101 +1,90 @@ /*- * Copyright (c) 2014 Andrew Turner - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_PTE_H_ #define _MACHINE_PTE_H_ #ifndef LOCORE typedef uint64_t pd_entry_t; /* page directory entry */ typedef uint64_t pt_entry_t; /* page table entry */ typedef uint64_t pn_t; /* page number */ #endif /* Level 0 table, 512GiB per entry */ #define L0_SHIFT 39 /* Level 1 table, 1GiB per entry */ #define L1_SHIFT 30 #define L1_SIZE (1 << L1_SHIFT) #define L1_OFFSET (L1_SIZE - 1) /* Level 2 table, 2MiB per entry */ #define L2_SHIFT 21 #define L2_SIZE (1 << L2_SHIFT) #define L2_OFFSET (L2_SIZE - 1) /* Level 3 table, 4KiB per entry */ #define L3_SHIFT 12 #define L3_SIZE (1 << L3_SHIFT) #define L3_OFFSET (L3_SIZE - 1) #define Ln_ENTRIES (1 << 9) #define Ln_ADDR_MASK (Ln_ENTRIES - 1) /* Bits 9:7 are reserved for software */ -#define PTE_SW_MANAGED (1 << 8) -#define PTE_SW_WIRED (1 << 7) -#define PTE_DIRTY (1 << 6) /* Virtual page is written */ -#define PTE_REF (1 << 5) /* Virtual page is referenced */ -#define PTE_VALID (1 << 0) /* Virtual page is valid */ -#define PTE_TYPE_S 1 -#define PTE_TYPE_M (0xf << PTE_TYPE_S) -#define PTE_TYPE_PTR 0 -#define PTE_TYPE_PTR_G 1 -#define PTE_TYPE_SROURX 2 /* Supervisor read-only, user read-execute page. */ -#define PTE_TYPE_SRWURWX 3 /* Supervisor read-write, user read-write-execute page. */ -#define PTE_TYPE_SURO 4 /* Supervisor and user read-only page. */ -#define PTE_TYPE_SURW 5 /* Supervisor and user read-write page. */ -#define PTE_TYPE_SURX 6 /* Supervisor and user read-execute page. */ -#define PTE_TYPE_SURWX 7 /* Supervisor and User Read Write Execute */ -#define PTE_TYPE_SRO 8 /* Supervisor read-only page. */ -#define PTE_TYPE_SRW 9 /* Supervisor read-write page. */ -#define PTE_TYPE_SRX 10 /* Supervisor read-execute page. */ -#define PTE_TYPE_SRWX 11 /* Supervisor read-write-execute page. */ -#define PTE_TYPE_SRO_G 12 /* Supervisor read-only page--global mapping. */ -#define PTE_TYPE_SRW_G 13 /* Supervisor read-write page--global mapping. */ -#define PTE_TYPE_SRX_G 14 /* Supervisor read-execute page--global mapping. */ -#define PTE_TYPE_SRWX_G 15 /* Supervisor Read Write Execute Global */ +#define PTE_SW_MANAGED (1 << 9) +#define PTE_SW_WIRED (1 << 8) +#define PTE_D (1 << 7) /* Dirty */ +#define PTE_A (1 << 6) /* Accessed */ +#define PTE_G (1 << 5) /* Global */ +#define PTE_U (1 << 4) /* User */ +#define PTE_X (1 << 3) /* Execute */ +#define PTE_W (1 << 2) /* Write */ +#define PTE_R (1 << 1) /* Read */ +#define PTE_V (1 << 0) /* Valid */ +#define PTE_RWX (PTE_R | PTE_W | PTE_X) +#define PTE_RX (PTE_R | PTE_X) #define PTE_PPN0_S 10 #define PTE_PPN1_S 19 #define PTE_PPN2_S 28 #define PTE_PPN3_S 37 #define PTE_SIZE 8 #endif /* !_MACHINE_PTE_H_ */ /* End of pte.h */ Index: head/sys/riscv/include/riscvreg.h =================================================================== --- head/sys/riscv/include/riscvreg.h (revision 303659) +++ head/sys/riscv/include/riscvreg.h (revision 303660) @@ -1,171 +1,210 @@ /*- * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_RISCVREG_H_ #define _MACHINE_RISCVREG_H_ /* Machine mode requests */ #define ECALL_MTIMECMP 0x01 -#define ECALL_CLEAR_PENDING 0x02 -#define ECALL_HTIF_CMD 0x03 -#define ECALL_HTIF_GET_ENTRY 0x04 -#define ECALL_MCPUID_GET 0x05 -#define ECALL_MIMPID_GET 0x06 -#define ECALL_SEND_IPI 0x07 -#define ECALL_CLEAR_IPI 0x08 -#define ECALL_HTIF_LOWPUTC 0x09 -#define ECALL_MIE_SET 0x0a -#define ECALL_IO_IRQ_MASK 0x0b +#define ECALL_HTIF_GET_ENTRY 0x02 +#define ECALL_MCPUID_GET 0x03 +#define ECALL_MIMPID_GET 0x04 +#define ECALL_SEND_IPI 0x05 +#define ECALL_CLEAR_IPI 0x06 +#define ECALL_MIE_SET 0x07 +#define ECALL_IO_IRQ_MASK 0x08 +#define ECALL_HTIF_CMD 0x09 +#define ECALL_HTIF_CMD_REQ 0x0a +#define ECALL_HTIF_CMD_RESP 0x0b #define EXCP_SHIFT 0 #define EXCP_MASK (0xf << EXCP_SHIFT) -#define EXCP_INSTR_ADDR_MISALIGNED 0 -#define EXCP_INSTR_ACCESS_FAULT 1 -#define EXCP_INSTR_ILLEGAL 2 -#define EXCP_INSTR_BREAKPOINT 3 -#define EXCP_LOAD_ADDR_MISALIGNED 4 -#define EXCP_LOAD_ACCESS_FAULT 5 -#define EXCP_STORE_ADDR_MISALIGNED 6 -#define EXCP_STORE_ACCESS_FAULT 7 -#define EXCP_UMODE_ENV_CALL 8 -#define EXCP_SMODE_ENV_CALL 9 -#define EXCP_HMODE_ENV_CALL 10 -#define EXCP_MMODE_ENV_CALL 11 -#define EXCP_INTR (1 << 31) +#define EXCP_MISALIGNED_FETCH 0 +#define EXCP_FAULT_FETCH 1 +#define EXCP_ILLEGAL_INSTRUCTION 2 +#define EXCP_BREAKPOINT 3 +#define EXCP_MISALIGNED_LOAD 4 +#define EXCP_FAULT_LOAD 5 +#define EXCP_MISALIGNED_STORE 6 +#define EXCP_FAULT_STORE 7 +#define EXCP_USER_ECALL 8 +#define EXCP_SUPERVISOR_ECALL 9 +#define EXCP_HYPERVISOR_ECALL 10 +#define EXCP_MACHINE_ECALL 11 +#define EXCP_INTR (1ul << 63) #define EXCP_INTR_SOFTWARE 0 #define EXCP_INTR_TIMER 1 #define EXCP_INTR_HTIF 2 -#define SSTATUS_IE (1 << 0) -#define SSTATUS_PIE (1 << 3) -#define SSTATUS_PS (1 << 4) +#define SSTATUS_UIE (1 << 0) +#define SSTATUS_SIE (1 << 1) +#define SSTATUS_UPIE (1 << 4) +#define SSTATUS_SPIE (1 << 5) +#define SSTATUS_SPIE_SHIFT 5 +#define SSTATUS_SPP (1 << 8) +#define SSTATUS_SPP_SHIFT 8 +#define SSTATUS_FS_MASK 0x3 +#define SSTATUS_FS_SHIFT 13 +#define SSTATUS_XS_MASK 0x3 +#define SSTATUS_XS_SHIFT 15 +#define SSTATUS_PUM (1 << 18) +#define SSTATUS32_SD (1 << 63) +#define SSTATUS64_SD (1 << 31) -#define MSTATUS_MPRV (1 << 16) -#define MSTATUS_PRV_SHIFT 1 -#define MSTATUS_PRV1_SHIFT 4 -#define MSTATUS_PRV2_SHIFT 7 -#define MSTATUS_PRV_MASK (0x3 << MSTATUS_PRV_SHIFT) -#define MSTATUS_PRV_U 0 /* user */ -#define MSTATUS_PRV_S 1 /* supervisor */ -#define MSTATUS_PRV_H 2 /* hypervisor */ -#define MSTATUS_PRV_M 3 /* machine */ +#define MSTATUS_UIE (1 << 0) +#define MSTATUS_SIE (1 << 1) +#define MSTATUS_HIE (1 << 2) +#define MSTATUS_MIE (1 << 3) +#define MSTATUS_UPIE (1 << 4) +#define MSTATUS_SPIE (1 << 5) +#define MSTATUS_SPIE_SHIFT 5 +#define MSTATUS_HPIE (1 << 6) +#define MSTATUS_MPIE (1 << 7) +#define MSTATUS_MPIE_SHIFT 7 +#define MSTATUS_SPP (1 << 8) +#define MSTATUS_SPP_SHIFT 8 +#define MSTATUS_HPP_MASK 0x3 +#define MSTATUS_HPP_SHIFT 9 +#define MSTATUS_MPP_MASK 0x3 +#define MSTATUS_MPP_SHIFT 11 +#define MSTATUS_FS_MASK 0x3 +#define MSTATUS_FS_SHIFT 13 +#define MSTATUS_XS_MASK 0x3 +#define MSTATUS_XS_SHIFT 15 +#define MSTATUS_MPRV (1 << 17) +#define MSTATUS_PUM (1 << 18) +#define MSTATUS_VM_MASK 0x1f +#define MSTATUS_VM_SHIFT 24 +#define MSTATUS_VM_MBARE 0 +#define MSTATUS_VM_MBB 1 +#define MSTATUS_VM_MBBID 2 +#define MSTATUS_VM_SV32 8 +#define MSTATUS_VM_SV39 9 +#define MSTATUS_VM_SV48 10 +#define MSTATUS_VM_SV57 11 +#define MSTATUS_VM_SV64 12 +#define MSTATUS32_SD (1 << 63) +#define MSTATUS64_SD (1 << 31) -#define MSTATUS_VM_SHIFT 17 -#define MSTATUS_VM_MASK 0x1f -#define MSTATUS_VM_MBARE 0 -#define MSTATUS_VM_MBB 1 -#define MSTATUS_VM_MBBID 2 -#define MSTATUS_VM_SV32 8 -#define MSTATUS_VM_SV39 9 -#define MSTATUS_VM_SV48 10 +#define MSTATUS_PRV_U 0 /* user */ +#define MSTATUS_PRV_S 1 /* supervisor */ +#define MSTATUS_PRV_H 2 /* hypervisor */ +#define MSTATUS_PRV_M 3 /* machine */ +#define MIE_USIE (1 << 0) #define MIE_SSIE (1 << 1) #define MIE_HSIE (1 << 2) #define MIE_MSIE (1 << 3) +#define MIE_UTIE (1 << 4) #define MIE_STIE (1 << 5) #define MIE_HTIE (1 << 6) #define MIE_MTIE (1 << 7) +#define MIP_USIP (1 << 0) #define MIP_SSIP (1 << 1) #define MIP_HSIP (1 << 2) #define MIP_MSIP (1 << 3) +#define MIP_UTIP (1 << 4) #define MIP_STIP (1 << 5) #define MIP_HTIP (1 << 6) #define MIP_MTIP (1 << 7) -#define SR_IE (1 << 0) -#define SR_IE1 (1 << 3) -#define SR_IE2 (1 << 6) -#define SR_IE3 (1 << 9) - +#define SIE_USIE (1 << 0) #define SIE_SSIE (1 << 1) +#define SIE_UTIE (1 << 4) #define SIE_STIE (1 << 5) +#define MIP_SEIP (1 << 9) + /* Note: sip register has no SIP_STIP bit in Spike simulator */ #define SIP_SSIP (1 << 1) #define SIP_STIP (1 << 5) +#if 0 +/* lowRISC TODO */ #define NCSRS 4096 #define CSR_IPI 0x783 #define CSR_IO_IRQ 0x7c0 /* lowRISC only? */ +#endif + #define XLEN 8 #define INSN_SIZE 4 #define RISCV_INSN_NOP 0x00000013 #define RISCV_INSN_BREAK 0x00100073 #define RISCV_INSN_RET 0x00008067 #define CSR_ZIMM(val) \ (__builtin_constant_p(val) && ((u_long)(val) < 32)) #define csr_swap(csr, val) \ ({ if (CSR_ZIMM(val)) \ __asm __volatile("csrrwi %0, " #csr ", %1" \ : "=r" (val) : "i" (val)); \ else \ __asm __volatile("csrrw %0, " #csr ", %1" \ : "=r" (val) : "r" (val)); \ val; \ }) #define csr_write(csr, val) \ ({ if (CSR_ZIMM(val)) \ __asm __volatile("csrwi " #csr ", %0" :: "i" (val)); \ else \ __asm __volatile("csrw " #csr ", %0" :: "r" (val)); \ }) #define csr_set(csr, val) \ ({ if (CSR_ZIMM(val)) \ __asm __volatile("csrsi " #csr ", %0" :: "i" (val)); \ else \ __asm __volatile("csrs " #csr ", %0" :: "r" (val)); \ }) #define csr_clear(csr, val) \ ({ if (CSR_ZIMM(val)) \ __asm __volatile("csrci " #csr ", %0" :: "i" (val)); \ else \ __asm __volatile("csrc " #csr ", %0" :: "r" (val)); \ }) #define csr_read(csr) \ ({ u_long val; \ __asm __volatile("csrr %0, " #csr : "=r" (val)); \ val; \ }) #endif /* !_MACHINE_RISCVREG_H_ */ Index: head/sys/riscv/include/vmparam.h =================================================================== --- head/sys/riscv/include/vmparam.h (revision 303659) +++ head/sys/riscv/include/vmparam.h (revision 303660) @@ -1,244 +1,244 @@ /*- * Copyright (c) 1990 The Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)vmparam.h 5.9 (Berkeley) 5/12/91 * from: FreeBSD: src/sys/i386/include/vmparam.h,v 1.33 2000/03/30 * $FreeBSD$ */ #ifndef _MACHINE_VMPARAM_H_ #define _MACHINE_VMPARAM_H_ /* * Virtual memory related constants, all in bytes */ #ifndef MAXTSIZ #define MAXTSIZ (1*1024*1024*1024) /* max text size */ #endif #ifndef DFLDSIZ #define DFLDSIZ (128*1024*1024) /* initial data size limit */ #endif #ifndef MAXDSIZ #define MAXDSIZ (1*1024*1024*1024) /* max data size */ #endif #ifndef DFLSSIZ #define DFLSSIZ (128*1024*1024) /* initial stack size limit */ #endif #ifndef MAXSSIZ #define MAXSSIZ (1*1024*1024*1024) /* max stack size */ #endif #ifndef SGROWSIZ #define SGROWSIZ (128*1024) /* amount to grow stack */ #endif /* * The physical address space is sparsely populated. */ #define VM_PHYSSEG_SPARSE /* * The number of PHYSSEG entries must be one greater than the number * of phys_avail entries because the phys_avail entry that spans the * largest physical address that is accessible by ISA DMA is split * into two PHYSSEG entries. */ #define VM_PHYSSEG_MAX 64 /* * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool * from which physical pages are allocated and VM_FREEPOOL_DIRECT is * the pool from which physical pages for small UMA objects are * allocated. */ #define VM_NFREEPOOL 2 #define VM_FREEPOOL_DEFAULT 0 #define VM_FREEPOOL_DIRECT 1 /* * Create two free page lists: VM_FREELIST_DEFAULT is for physical * pages that are above the largest physical address that is * accessible by ISA DMA and VM_FREELIST_ISADMA is for physical pages * that are below that address. */ #define VM_NFREELIST 2 #define VM_FREELIST_DEFAULT 0 #define VM_FREELIST_ISADMA 1 /* * An allocation size of 16MB is supported in order to optimize the * use of the direct map by UMA. Specifically, a cache line contains * at most four TTEs, collectively mapping 16MB of physical memory. * By reducing the number of distinct 16MB "pages" that are used by UMA, * the physical memory allocator reduces the likelihood of both 4MB * page TLB misses and cache misses caused by 4MB page TLB misses. */ #define VM_NFREEORDER 12 /* * Enable superpage reservations: 1 level. */ #ifndef VM_NRESERVLEVEL #define VM_NRESERVLEVEL 1 #endif /* * Level 0 reservations consist of 512 pages. */ #ifndef VM_LEVEL_0_ORDER #define VM_LEVEL_0_ORDER 9 #endif /** * Address space layout. * * RISC-V implements up to a 48 bit virtual address space. The address space is * split into 2 regions at each end of the 64 bit address space, with an * out of range "hole" in the middle. * * We limit the size of the two spaces to 39 bits each. * * Upper region: 0xffffffffffffffff * 0xffffff8000000000 * * Hole: 0xffffff7fffffffff * 0x0000008000000000 * * Lower region: 0x0000007fffffffff * 0x0000000000000000 * * We use the upper region for the kernel, and the lower region for userland. * * We define some interesting address constants: * * VM_MIN_ADDRESS and VM_MAX_ADDRESS define the start and end of the entire * 64 bit address space, mostly just for convenience. * * VM_MIN_KERNEL_ADDRESS and VM_MAX_KERNEL_ADDRESS define the start and end of * mappable kernel virtual address space. * * VM_MIN_USER_ADDRESS and VM_MAX_USER_ADDRESS define the start and end of the * user address space. */ #define VM_MIN_ADDRESS (0x0000000000000000UL) #define VM_MAX_ADDRESS (0xffffffffffffffffUL) /* 32 GiB of kernel addresses */ #define VM_MIN_KERNEL_ADDRESS (0xffffffc000000000UL) #define VM_MAX_KERNEL_ADDRESS (0xffffffc800000000UL) /* Direct Map for 128 GiB of PA: 0x0 - 0x1fffffffff */ #define DMAP_MIN_ADDRESS (0xffffffd000000000UL) #define DMAP_MAX_ADDRESS (0xffffffefffffffffUL) #define DMAP_MIN_PHYSADDR (0x0000000000000000UL) #define DMAP_MAX_PHYSADDR (DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) /* True if pa is in the dmap range */ #define PHYS_IN_DMAP(pa) ((pa) >= DMAP_MIN_PHYSADDR && \ (pa) <= DMAP_MAX_PHYSADDR) /* True if va is in the dmap range */ #define VIRT_IN_DMAP(va) ((va) >= DMAP_MIN_ADDRESS && \ (va) <= DMAP_MAX_ADDRESS) #define PHYS_TO_DMAP(pa) \ ({ \ KASSERT(PHYS_IN_DMAP(pa), \ ("%s: PA out of range, PA: 0x%lx", __func__, \ (vm_paddr_t)(pa))); \ (pa) | DMAP_MIN_ADDRESS; \ }) #define DMAP_TO_PHYS(va) \ ({ \ KASSERT(VIRT_IN_DMAP(va), \ ("%s: VA out of range, VA: 0x%lx", __func__, \ (vm_offset_t)(va))); \ (va) & ~DMAP_MIN_ADDRESS; \ }) #define VM_MIN_USER_ADDRESS (0x0000000000000000UL) #define VM_MAX_USER_ADDRESS (0x0000004000000000UL) #define VM_MINUSER_ADDRESS (VM_MIN_USER_ADDRESS) #define VM_MAXUSER_ADDRESS (VM_MAX_USER_ADDRESS) #define KERNBASE (VM_MIN_KERNEL_ADDRESS) #define SHAREDPAGE (VM_MAXUSER_ADDRESS - PAGE_SIZE) #define USRSTACK SHAREDPAGE -#define KERNENTRY (0x200) +#define KERNENTRY (0x80000000) /* * How many physical pages per kmem arena virtual page. */ #ifndef VM_KMEM_SIZE_SCALE #define VM_KMEM_SIZE_SCALE (3) #endif /* * Optional floor (in bytes) on the size of the kmem arena. */ #ifndef VM_KMEM_SIZE_MIN #define VM_KMEM_SIZE_MIN (16 * 1024 * 1024) #endif /* * Optional ceiling (in bytes) on the size of the kmem arena: 60% of the * kernel map. */ #ifndef VM_KMEM_SIZE_MAX #define VM_KMEM_SIZE_MAX ((VM_MAX_KERNEL_ADDRESS - \ VM_MIN_KERNEL_ADDRESS + 1) * 3 / 5) #endif /* * Initial pagein size of beginning of executable file. */ #ifndef VM_INITIAL_PAGEIN #define VM_INITIAL_PAGEIN 16 #endif /* * RISCVTODO * #define UMA_MD_SMALL_ALLOC */ extern u_int tsb_kernel_ldd_phys; extern vm_offset_t vm_max_kernel_address; extern vm_offset_t init_pt_va; #define ZERO_REGION_SIZE (64 * 1024) /* 64KB */ #define DEVMAP_MAX_VADDR VM_MAX_KERNEL_ADDRESS #endif /* !_MACHINE_VMPARAM_H_ */ Index: head/sys/riscv/riscv/exception.S =================================================================== --- head/sys/riscv/riscv/exception.S (revision 303659) +++ head/sys/riscv/riscv/exception.S (revision 303660) @@ -1,611 +1,622 @@ /*- * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "assym.s" #include #include .macro save_registers el addi sp, sp, -(TF_SIZE) sd ra, (TF_RA)(sp) sd tp, (TF_TP)(sp) .if \el == 0 /* We came from userspace. Load our pcpu */ sd gp, (TF_GP)(sp) ld gp, (TF_SIZE)(sp) .endif sd t0, (TF_T + 0 * 8)(sp) sd t1, (TF_T + 1 * 8)(sp) sd t2, (TF_T + 2 * 8)(sp) sd t3, (TF_T + 3 * 8)(sp) sd t4, (TF_T + 4 * 8)(sp) sd t5, (TF_T + 5 * 8)(sp) sd t6, (TF_T + 6 * 8)(sp) sd s0, (TF_S + 0 * 8)(sp) sd s1, (TF_S + 1 * 8)(sp) sd s2, (TF_S + 2 * 8)(sp) sd s3, (TF_S + 3 * 8)(sp) sd s4, (TF_S + 4 * 8)(sp) sd s5, (TF_S + 5 * 8)(sp) sd s6, (TF_S + 6 * 8)(sp) sd s7, (TF_S + 7 * 8)(sp) sd s8, (TF_S + 8 * 8)(sp) sd s9, (TF_S + 9 * 8)(sp) sd s10, (TF_S + 10 * 8)(sp) sd s11, (TF_S + 11 * 8)(sp) sd a0, (TF_A + 0 * 8)(sp) sd a1, (TF_A + 1 * 8)(sp) sd a2, (TF_A + 2 * 8)(sp) sd a3, (TF_A + 3 * 8)(sp) sd a4, (TF_A + 4 * 8)(sp) sd a5, (TF_A + 5 * 8)(sp) sd a6, (TF_A + 6 * 8)(sp) sd a7, (TF_A + 7 * 8)(sp) #if 0 /* XXX: temporary test: spin if stack is not kernel one */ .if \el == 1 /* kernel */ mv t0, sp srli t0, t0, 63 1: beqz t0, 1b .endif #endif .if \el == 1 /* Store kernel sp */ li t1, TF_SIZE add t0, sp, t1 sd t0, (TF_SP)(sp) .else /* Store user sp */ csrr t0, sscratch sd t0, (TF_SP)(sp) .endif li t0, 0 csrw sscratch, t0 csrr t0, sepc sd t0, (TF_SEPC)(sp) csrr t0, sstatus sd t0, (TF_SSTATUS)(sp) csrr t0, sbadaddr sd t0, (TF_SBADADDR)(sp) csrr t0, scause sd t0, (TF_SCAUSE)(sp) .endm .macro load_registers el ld t0, (TF_SSTATUS)(sp) .if \el == 0 /* Ensure user interrupts will be enabled on eret. */ - ori t0, t0, SSTATUS_PIE + li t1, SSTATUS_SPIE + or t0, t0, t1 .else /* * Disable interrupts for supervisor mode exceptions. * For user mode exceptions we have already done this * in do_ast. */ - li t1, ~SSTATUS_IE + li t1, ~SSTATUS_SIE and t0, t0, t1 .endif csrw sstatus, t0 ld t0, (TF_SEPC)(sp) csrw sepc, t0 .if \el == 0 /* We go to userspace. Load user sp */ ld t0, (TF_SP)(sp) csrw sscratch, t0 /* And store our pcpu */ sd gp, (TF_SIZE)(sp) ld gp, (TF_GP)(sp) .endif ld ra, (TF_RA)(sp) ld tp, (TF_TP)(sp) ld t0, (TF_T + 0 * 8)(sp) ld t1, (TF_T + 1 * 8)(sp) ld t2, (TF_T + 2 * 8)(sp) ld t3, (TF_T + 3 * 8)(sp) ld t4, (TF_T + 4 * 8)(sp) ld t5, (TF_T + 5 * 8)(sp) ld t6, (TF_T + 6 * 8)(sp) ld s0, (TF_S + 0 * 8)(sp) ld s1, (TF_S + 1 * 8)(sp) ld s2, (TF_S + 2 * 8)(sp) ld s3, (TF_S + 3 * 8)(sp) ld s4, (TF_S + 4 * 8)(sp) ld s5, (TF_S + 5 * 8)(sp) ld s6, (TF_S + 6 * 8)(sp) ld s7, (TF_S + 7 * 8)(sp) ld s8, (TF_S + 8 * 8)(sp) ld s9, (TF_S + 9 * 8)(sp) ld s10, (TF_S + 10 * 8)(sp) ld s11, (TF_S + 11 * 8)(sp) ld a0, (TF_A + 0 * 8)(sp) ld a1, (TF_A + 1 * 8)(sp) ld a2, (TF_A + 2 * 8)(sp) ld a3, (TF_A + 3 * 8)(sp) ld a4, (TF_A + 4 * 8)(sp) ld a5, (TF_A + 5 * 8)(sp) ld a6, (TF_A + 6 * 8)(sp) ld a7, (TF_A + 7 * 8)(sp) addi sp, sp, (TF_SIZE) .endm .macro do_ast /* Disable interrupts */ csrr a4, sstatus 1: - csrci sstatus, SSTATUS_IE + csrci sstatus, (SSTATUS_SIE) ld a1, PC_CURTHREAD(gp) lw a2, TD_FLAGS(a1) li a3, (TDF_ASTPENDING|TDF_NEEDRESCHED) and a2, a2, a3 beqz a2, 2f /* Restore interrupts */ - andi a4, a4, SSTATUS_IE + andi a4, a4, (SSTATUS_SIE) csrs sstatus, a4 /* Handle the ast */ mv a0, sp call _C_LABEL(ast) /* Re-check for new ast scheduled */ j 1b 2: .endm +ENTRY(cpu_exception_handler) + csrrw sp, sscratch, sp + beqz sp, 1f + /* User mode detected */ + csrrw sp, sscratch, sp + j cpu_exception_handler_user +1: + /* Supervisor mode detected */ + csrrw sp, sscratch, sp + j cpu_exception_handler_supervisor +END(cpu_exception_handler) + ENTRY(cpu_exception_handler_supervisor) save_registers 1 mv a0, sp call _C_LABEL(do_trap_supervisor) load_registers 1 - eret + sret END(cpu_exception_handler_supervisor) ENTRY(cpu_exception_handler_user) csrrw sp, sscratch, sp save_registers 0 mv a0, sp call _C_LABEL(do_trap_user) do_ast load_registers 0 csrrw sp, sscratch, sp - eret + sret END(cpu_exception_handler_user) /* * Trap handlers */ .text bad_trap: j bad_trap -user_trap: +machine_trap: /* Save state */ csrrw sp, mscratch, sp addi sp, sp, -64 sd t0, (8 * 0)(sp) sd t1, (8 * 1)(sp) sd t2, (8 * 2)(sp) sd t3, (8 * 3)(sp) sd t4, (8 * 4)(sp) sd t5, (8 * 5)(sp) sd a0, (8 * 7)(sp) - la t2, _C_LABEL(cpu_exception_handler_user) - - csrr t0, mcause - bltz t0, machine_interrupt - j exit_mrts - -supervisor_trap: - /* Save state */ - csrrw sp, mscratch, sp - addi sp, sp, -64 - sd t0, (8 * 0)(sp) - sd t1, (8 * 1)(sp) - sd t2, (8 * 2)(sp) - sd t3, (8 * 3)(sp) - sd t4, (8 * 4)(sp) - sd t5, (8 * 5)(sp) - sd a0, (8 * 7)(sp) - - la t2, _C_LABEL(cpu_exception_handler_supervisor) - + csrr t3, mstatus /* Required for debug */ csrr t0, mcause bltz t0, machine_interrupt - li t1, EXCP_SMODE_ENV_CALL + li t1, EXCP_SUPERVISOR_ECALL beq t0, t1, supervisor_call - j exit_mrts +4: + /* NOT REACHED */ + j 4b machine_interrupt: /* Type of interrupt ? */ csrr t0, mcause andi t0, t0, EXCP_MASK - li t1, 0 - beq t1, t0, software_interrupt - li t1, 1 - beq t1, t0, timer_interrupt - li t1, 2 - beq t1, t0, htif_interrupt +#if 0 + /* lowRISC TODO */ li t1, 4 beq t1, t0, io_interrupt /* lowRISC only */ +#endif + li t1, 1 + beq t1, t0, supervisor_software_interrupt + li t1, 3 + beq t1, t0, machine_software_interrupt + li t1, 5 + beq t1, t0, supervisor_timer_interrupt + li t1, 7 + beq t1, t0, machine_timer_interrupt - /* not reached */ + /* NOT REACHED */ 1: j 1b +#if 0 + /* lowRISC TODO */ io_interrupt: /* Disable IO interrupts so we can go to supervisor mode */ csrwi CSR_IO_IRQ, 0 /* Handle the trap in supervisor mode */ j exit_mrts +#endif -software_interrupt: +supervisor_software_interrupt: +1: + /* Nothing here as we are using mideleg feature */ + j 1b + +machine_software_interrupt: + /* Clear IPI */ + li t0, 0x40001000 + csrr t2, mhartid + li t3, 0x1000 + mul t2, t2, t3 + add t0, t0, t2 + li t2, 0 + sd t2, 0(t0) + + /* Clear machine software pending bit */ li t0, MIP_MSIP csrc mip, t0 + + /* Post supervisor software interrupt */ li t0, MIP_SSIP csrs mip, t0 - /* If PRV1 is PRV_U (user) then serve the trap */ - csrr t0, mstatus - li t1, (MSTATUS_PRV_M << MSTATUS_PRV1_SHIFT) - and t0, t0, t1 - beqz t0, 1f - - /* - * If PRV1 is supervisor and interrupts were enabled, - * then serve the trap. - */ - csrr t0, mstatus - li t1, (SR_IE1 | (MSTATUS_PRV_M << MSTATUS_PRV1_SHIFT)) - and t0, t0, t1 - li t1, (SR_IE1 | (MSTATUS_PRV_S << MSTATUS_PRV1_SHIFT)) - beq t0, t1, 1f - j exit +supervisor_timer_interrupt: 1: - /* Handle the trap in supervisor mode */ - j exit_mrts + /* Nothing here as we are using mideleg feature */ + j 1b -timer_interrupt: +machine_timer_interrupt: /* Disable machine timer interrupts */ li t0, MIE_MTIE csrc mie, t0 - /* Clear machine pending */ + /* Clear machine timer interrupt pending */ li t0, MIP_MTIP csrc mip, t0 /* Post supervisor timer interrupt */ li t0, MIP_STIP csrs mip, t0 - /* If PRV1 is PRV_U (user) then serve the trap */ - csrr t0, mstatus - li t1, (MSTATUS_PRV_M << MSTATUS_PRV1_SHIFT) - and t0, t0, t1 - beqz t0, 1f - /* - * If PRV1 is supervisor and interrupts were enabled, - * then serve the trap. + * Check for HTIF interrupts. + * The only interrupt expected here is key press. */ - csrr t0, mstatus - li t1, (SR_IE1 | (MSTATUS_PRV_M << MSTATUS_PRV1_SHIFT)) - and t0, t0, t1 - li t1, (SR_IE1 | (MSTATUS_PRV_S << MSTATUS_PRV1_SHIFT)) - beq t0, t1, 1f + la t0, htif_lock + li t2, 1 + amoswap.d t3, t2, 0(t0) + bnez t3, 5f /* Another operation in progress, give up */ - j exit + /* We have lock */ + la t1, fromhost + ld t5, 0(t1) + beqz t5, 4f -1: - /* Serve a trap in supervisor mode */ - j exit_mrts - -htif_interrupt: -1: - li t5, 0 - csrrw t5, mfromhost, t5 - beqz t5, 3f - - /* Console PUT intr ? */ + /* Console GET intr ? */ mv t1, t5 - li t0, 0x101 + li t0, 0x100 srli t1, t1, 48 - bne t1, t0, 2f - /* Yes */ - la t0, console_intr - li t1, 1 - sd t1, 0(t0) - - /* Check if there is any other pending event */ + beq t1, t0, 2f +1: + /* There is no interrupts except keypress */ j 1b 2: /* Save entry */ la t0, htif_ring - csrr t1, mhartid - li t4, (HTIF_RING_SIZE + 16) - mulw t4, t4, t1 - add t0, t0, t4 li t4, (HTIF_RING_SIZE) add t0, t0, t4 /* t0 == htif_ring_cursor */ ld t1, 0(t0) /* load ptr to cursor */ sd t5, 0(t1) /* put entry */ li t4, 1 sd t4, 8(t1) /* mark used */ ld t4, 16(t1) /* take next */ /* Update cursor */ sd t4, 0(t0) /* Post supervisor software interrupt */ li t0, MIP_SSIP csrs mip, t0 - /* Check if there is any other pending event */ - j 1b - 3: + la t1, fromhost + li t5, 0 + sd t5, 0(t1) + +4: + /* Release lock */ + la t0, htif_lock + li t2, 0 + amoswap.d t3, t2, 0(t0) + +5: j exit supervisor_call: csrr t1, mepc addi t1, t1, 4 /* Next instruction in t1 */ li t4, ECALL_HTIF_CMD beq t5, t4, htif_cmd + li t4, ECALL_HTIF_CMD_REQ + beq t5, t4, htif_cmd_req + li t4, ECALL_HTIF_CMD_RESP + beq t5, t4, htif_cmd_resp li t4, ECALL_HTIF_GET_ENTRY beq t5, t4, htif_get_entry li t4, ECALL_MTIMECMP beq t5, t4, set_mtimecmp - li t4, ECALL_CLEAR_PENDING - beq t5, t4, clear_pending li t4, ECALL_MCPUID_GET beq t5, t4, mcpuid_get li t4, ECALL_MIMPID_GET beq t5, t4, mimpid_get li t4, ECALL_SEND_IPI beq t5, t4, send_ipi li t4, ECALL_CLEAR_IPI beq t5, t4, clear_ipi - li t4, ECALL_HTIF_LOWPUTC - beq t5, t4, htif_lowputc li t4, ECALL_MIE_SET beq t5, t4, mie_set +#if 0 + /* lowRISC TODO */ li t4, ECALL_IO_IRQ_MASK beq t5, t4, io_irq_mask +#endif j exit_next_instr +#if 0 + /* lowRISC TODO */ io_irq_mask: csrw CSR_IO_IRQ, t6 j exit_next_instr +#endif mie_set: csrs mie, t6 j exit_next_instr mcpuid_get: - csrr t6, mcpuid + csrr t6, misa j exit_next_instr mimpid_get: csrr t6, mimpid j exit_next_instr send_ipi: - /* CPU mmio base in t6 */ + /* CPU ipi MMIO register in t6 */ mv t0, t6 - li t2, (CSR_IPI * XLEN) - add t0, t0, t2 /* t0 = CSR_IPI */ li t2, 1 sd t2, 0(t0) j exit_next_instr clear_ipi: /* Do only clear if there are no new entries in HTIF ring */ la t0, htif_ring - csrr t2, mhartid - li t4, (HTIF_RING_SIZE + 16) - mulw t4, t4, t2 - add t0, t0, t4 li t4, (HTIF_RING_SIZE) add t0, t0, t4 /* t0 == ptr to htif_ring_cursor */ ld t2, 8(t0) /* load htif_ring_last */ ld t2, 8(t2) /* load used */ bnez t2, 1f /* Clear supervisor software interrupt pending bit */ li t0, MIP_SSIP csrc mip, t0 1: j exit_next_instr htif_get_entry: /* Get a htif_ring for current core */ la t0, htif_ring - csrr t2, mhartid - li t4, (HTIF_RING_SIZE + 16) - mulw t4, t4, t2 - add t0, t0, t4 li t4, (HTIF_RING_SIZE + 8) add t0, t0, t4 /* t0 == htif_ring_last */ /* Check for new entries */ li t6, 0 /* preset return value */ ld t2, 0(t0) /* load ptr to last */ ld t4, 8(t2) /* get used */ beqz t4, 1f /* No new entries. Exit */ /* Get one */ ld t6, 0(t2) /* get entry */ li t4, 0 sd t4, 8(t2) /* mark free */ sd t4, 0(t2) /* free entry, just in case */ ld t4, 16(t2) /* take next */ sd t4, 0(t0) /* update ptr to last */ 1: /* Exit. Result is stored in t6 */ j exit_next_instr -htif_cmd: +htif_cmd_resp: + la t0, htif_lock + li t2, 1 1: - mv t0, t6 - csrrw t0, mtohost, t0 - bnez t0, 1b + amoswap.d t3, t2, 0(t0) + bnez t3, 1b + + /* We have lock. Read for data */ + la t4, fromhost + ld t6, 0(t4) + beqz t6, 2f + + /* Clear event */ + li t5, 0 + sd t5, 0(t4) + +2: + /* Release lock */ + la t0, htif_lock + li t2, 0 + amoswap.d t3, t2, 0(t0) + j exit_next_instr -htif_lowputc: +htif_cmd_req: + la t0, htif_lock + li t2, 1 1: - mv t0, t6 - csrrw t0, mtohost, t0 - bnez t0, 1b + amoswap.d t3, t2, 0(t0) + bnez t3, 1b + /* We have lock. Store new request */ + la t4, tohost + sd t6, 0(t4) + + /* Release lock */ + la t0, htif_lock + li t2, 0 + amoswap.d t3, t2, 0(t0) + + j exit_next_instr + +htif_cmd: + la t0, htif_lock + li t2, 1 +1: + amoswap.d t3, t2, 0(t0) + bnez t3, 1b + + mv t3, t6 + + /* We have lock. Store new request */ + la t4, tohost + sd t6, 0(t4) 2: - li t4, 0 - csrrw t5, mfromhost, t4 - beqz t5, 2b + /* Poll for result */ + la t4, fromhost + ld t6, 0(t4) + beqz t6, 2b - /* Console PUT intr ? */ - mv t2, t5 - srli t2, t2, 48 - li t3, 0x0101 - beq t2, t3, 3f + /* Check for unexpected event */ + srli t0, t6, 48 + srli t2, t3, 48 + beq t2, t0, 3f - /* Not a console PUT, so save entry */ + /* + * We have something unexpected (e.g. keyboard keypress) + * Save entry. + */ la t0, htif_ring - csrr t2, mhartid - li t4, (HTIF_RING_SIZE + 16) - mulw t4, t4, t2 - add t0, t0, t4 li t4, (HTIF_RING_SIZE) add t0, t0, t4 /* t0 == htif_ring_cursor */ ld t2, 0(t0) /* load ptr to cursor */ - sd t5, 0(t2) /* put entry */ + sd t6, 0(t2) /* put entry */ li t4, 1 sd t4, 8(t2) /* mark used */ ld t4, 16(t2) /* take next */ /* Update cursor */ sd t4, 0(t0) /* Post supervisor software interrupt */ li t0, MIP_SSIP csrs mip, t0 - /* Wait for console intr again */ + /* Clear and look for response again */ + la t2, fromhost + li t5, 0 + sd t5, 0(t2) j 2b 3: + la t2, fromhost + li t5, 0 + sd t5, 0(t2) + + /* Release lock */ + la t0, htif_lock + li t2, 0 + amoswap.d t3, t2, 0(t0) + j exit_next_instr set_mtimecmp: - csrr t2, stime - add t6, t6, t2 - csrw mtimecmp, t6 - /* Enable interrupts */ li t0, (MIE_MTIE | MIE_STIE) csrs mie, t0 j exit_next_instr -clear_pending: - li t0, MIP_STIP - csrc mip, t0 - j exit_next_instr - /* * Trap exit functions */ exit_next_instr: /* Next instruction is in t1 */ csrw mepc, t1 exit: /* Restore state */ ld t0, (8 * 0)(sp) ld t1, (8 * 1)(sp) ld t2, (8 * 2)(sp) ld t3, (8 * 3)(sp) ld t4, (8 * 4)(sp) ld t5, (8 * 5)(sp) ld a0, (8 * 7)(sp) addi sp, sp, 64 csrrw sp, mscratch, sp - eret + mret -/* - * Redirect to supervisor - */ exit_mrts: - /* Setup exception handler */ - li t1, KERNBASE - add t2, t2, t1 - csrw stvec, t2 - - /* Restore state */ - ld t0, (8 * 0)(sp) - ld t1, (8 * 1)(sp) - ld t2, (8 * 2)(sp) - ld t3, (8 * 3)(sp) - ld t4, (8 * 4)(sp) - ld t5, (8 * 5)(sp) - ld a0, (8 * 7)(sp) - addi sp, sp, 64 - csrrw sp, mscratch, sp - - /* Redirect to supervisor */ - mrts + j exit_mrts Index: head/sys/riscv/riscv/genassym.c =================================================================== --- head/sys/riscv/riscv/genassym.c (revision 303659) +++ head/sys/riscv/riscv/genassym.c (revision 303660) @@ -1,99 +1,100 @@ /*- * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include ASSYM(KERNBASE, KERNBASE); +ASSYM(KERNENTRY, KERNENTRY); ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS); ASSYM(VM_MAX_KERNEL_ADDRESS, VM_MAX_KERNEL_ADDRESS); ASSYM(TDF_ASTPENDING, TDF_ASTPENDING); ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED); ASSYM(PCB_ONFAULT, offsetof(struct pcb, pcb_onfault)); ASSYM(PCB_L1ADDR, offsetof(struct pcb, pcb_l1addr)); ASSYM(PCB_SIZE, sizeof(struct pcb)); ASSYM(PCB_RA, offsetof(struct pcb, pcb_ra)); ASSYM(PCB_SP, offsetof(struct pcb, pcb_sp)); ASSYM(PCB_GP, offsetof(struct pcb, pcb_gp)); ASSYM(PCB_TP, offsetof(struct pcb, pcb_tp)); ASSYM(PCB_T, offsetof(struct pcb, pcb_t)); ASSYM(PCB_S, offsetof(struct pcb, pcb_s)); ASSYM(PCB_A, offsetof(struct pcb, pcb_a)); ASSYM(SF_UC, offsetof(struct sigframe, sf_uc)); ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread)); ASSYM(TD_PCB, offsetof(struct thread, td_pcb)); ASSYM(TD_FLAGS, offsetof(struct thread, td_flags)); ASSYM(TD_PROC, offsetof(struct thread, td_proc)); ASSYM(TD_FRAME, offsetof(struct thread, td_frame)); ASSYM(TD_MD, offsetof(struct thread, td_md)); ASSYM(TD_LOCK, offsetof(struct thread, td_lock)); ASSYM(TF_SIZE, sizeof(struct trapframe)); ASSYM(TF_RA, offsetof(struct trapframe, tf_ra)); ASSYM(TF_SP, offsetof(struct trapframe, tf_sp)); ASSYM(TF_GP, offsetof(struct trapframe, tf_gp)); ASSYM(TF_TP, offsetof(struct trapframe, tf_tp)); ASSYM(TF_T, offsetof(struct trapframe, tf_t)); ASSYM(TF_S, offsetof(struct trapframe, tf_s)); ASSYM(TF_A, offsetof(struct trapframe, tf_a)); ASSYM(TF_SEPC, offsetof(struct trapframe, tf_sepc)); ASSYM(TF_SBADADDR, offsetof(struct trapframe, tf_sbadaddr)); ASSYM(TF_SCAUSE, offsetof(struct trapframe, tf_scause)); ASSYM(TF_SSTATUS, offsetof(struct trapframe, tf_sstatus)); Index: head/sys/riscv/riscv/identcpu.c =================================================================== --- head/sys/riscv/riscv/identcpu.c (revision 303659) +++ head/sys/riscv/riscv/identcpu.c (revision 303660) @@ -1,149 +1,136 @@ /*- - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include char machine[] = "riscv"; SYSCTL_STRING(_hw, HW_MACHINE, machine, CTLFLAG_RD, machine, 0, "Machine class"); struct cpu_desc { u_int cpu_impl; u_int cpu_part_num; const char *cpu_impl_name; const char *cpu_part_name; }; struct cpu_desc cpu_desc[MAXCPU]; struct cpu_parts { u_int part_id; const char *part_name; }; #define CPU_PART_NONE { -1, "Unknown Processor" } struct cpu_implementers { u_int impl_id; const char *impl_name; - /* - * Part number is implementation defined - * so each vendor will have its own set of values and names. - */ - const struct cpu_parts *cpu_parts; }; -#define CPU_IMPLEMENTER_NONE { 0, "Unknown Implementer", cpu_parts_none } +#define CPU_IMPLEMENTER_NONE { 0, "Unknown Implementer" } /* - * Per-implementer table of (PartNum, CPU Name) pairs. + * CPU base */ -/* UC Berkeley */ -static const struct cpu_parts cpu_parts_ucb[] = { - { CPU_PART_RV32I, "RV32I" }, - { CPU_PART_RV32E, "RV32E" }, - { CPU_PART_RV64I, "RV64I" }, - { CPU_PART_RV128I, "RV128I" }, +static const struct cpu_parts cpu_parts_std[] = { + { CPU_PART_RV32, "RV32" }, + { CPU_PART_RV64, "RV64" }, + { CPU_PART_RV128, "RV128" }, CPU_PART_NONE, }; -/* Unknown */ -static const struct cpu_parts cpu_parts_none[] = { - CPU_PART_NONE, -}; - /* * Implementers table. */ const struct cpu_implementers cpu_implementers[] = { - { CPU_IMPL_UCB_ROCKET, "UC Berkeley Rocket", cpu_parts_ucb }, + { CPU_IMPL_UCB_ROCKET, "UC Berkeley Rocket" }, CPU_IMPLEMENTER_NONE, }; void identify_cpu(void) { const struct cpu_parts *cpu_partsp; uint32_t part_id; uint32_t impl_id; uint64_t mimpid; - uint64_t mcpuid; + uint64_t misa; u_int cpu; size_t i; cpu_partsp = NULL; mimpid = machine_command(ECALL_MIMPID_GET, 0); - mcpuid = machine_command(ECALL_MCPUID_GET, 0); + misa = machine_command(ECALL_MCPUID_GET, 0); - /* SMPTODO: use mhartid ? */ cpu = PCPU_GET(cpuid); impl_id = CPU_IMPL(mimpid); for (i = 0; i < nitems(cpu_implementers); i++) { if (impl_id == cpu_implementers[i].impl_id || cpu_implementers[i].impl_id == 0) { cpu_desc[cpu].cpu_impl = impl_id; cpu_desc[cpu].cpu_impl_name = cpu_implementers[i].impl_name; - cpu_partsp = cpu_implementers[i].cpu_parts; + cpu_partsp = cpu_parts_std; break; } } - part_id = CPU_PART(mcpuid); + part_id = CPU_PART(misa); for (i = 0; &cpu_partsp[i] != NULL; i++) { if (part_id == cpu_partsp[i].part_id || cpu_partsp[i].part_id == -1) { cpu_desc[cpu].cpu_part_num = part_id; cpu_desc[cpu].cpu_part_name = cpu_partsp[i].part_name; break; } } /* Print details for boot CPU or if we want verbose output */ if (cpu == 0 || bootverbose) { printf("CPU(%d): %s %s\n", cpu, cpu_desc[cpu].cpu_impl_name, cpu_desc[cpu].cpu_part_name); } } Index: head/sys/riscv/riscv/intr_machdep.c =================================================================== --- head/sys/riscv/riscv/intr_machdep.c (revision 303659) +++ head/sys/riscv/riscv/intr_machdep.c (revision 303660) @@ -1,304 +1,315 @@ /*- * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #ifdef SMP #include #endif u_long intrcnt[NIRQS]; size_t sintrcnt = sizeof(intrcnt); char intrnames[NIRQS * (MAXCOMLEN + 1) * 2]; size_t sintrnames = sizeof(intrnames); static struct intr_event *intr_events[NIRQS]; static riscv_intrcnt_t riscv_intr_counters[NIRQS]; static int intrcnt_index; riscv_intrcnt_t riscv_intrcnt_create(const char* name) { riscv_intrcnt_t counter; counter = &intrcnt[intrcnt_index++]; riscv_intrcnt_setname(counter, name); return (counter); } void riscv_intrcnt_setname(riscv_intrcnt_t counter, const char *name) { int i; i = (counter - intrcnt); KASSERT(counter != NULL, ("riscv_intrcnt_setname: NULL counter")); snprintf(intrnames + (MAXCOMLEN + 1) * i, MAXCOMLEN + 1, "%-*s", MAXCOMLEN, name); } static void riscv_mask_irq(void *source) { uintptr_t irq; irq = (uintptr_t)source; switch (irq) { - case IRQ_TIMER: + case IRQ_TIMER_SUPERVISOR: csr_clear(sie, SIE_STIE); break; - case IRQ_SOFTWARE: + case IRQ_SOFTWARE_USER: + csr_clear(sie, SIE_USIE); + case IRQ_SOFTWARE_SUPERVISOR: csr_clear(sie, SIE_SSIE); break; +#if 0 + /* lowRISC TODO */ case IRQ_UART: machine_command(ECALL_IO_IRQ_MASK, 0); break; +#endif default: panic("Unknown irq %d\n", irq); } } static void riscv_unmask_irq(void *source) { uintptr_t irq; irq = (uintptr_t)source; switch (irq) { - case IRQ_TIMER: + case IRQ_TIMER_SUPERVISOR: csr_set(sie, SIE_STIE); break; - case IRQ_SOFTWARE: + case IRQ_SOFTWARE_USER: + csr_set(sie, SIE_USIE); + break; + case IRQ_SOFTWARE_SUPERVISOR: csr_set(sie, SIE_SSIE); break; +#if 0 + /* lowRISC TODO */ case IRQ_UART: machine_command(ECALL_IO_IRQ_MASK, 1); break; +#endif default: panic("Unknown irq %d\n", irq); } } void riscv_init_interrupts(void) { char name[MAXCOMLEN + 1]; int i; for (i = 0; i < NIRQS; i++) { snprintf(name, MAXCOMLEN + 1, "int%d:", i); riscv_intr_counters[i] = riscv_intrcnt_create(name); } } int riscv_setup_intr(const char *name, driver_filter_t *filt, void (*handler)(void*), void *arg, int irq, int flags, void **cookiep) { struct intr_event *event; int error; if (irq < 0 || irq >= NIRQS) panic("%s: unknown intr %d", __func__, irq); event = intr_events[irq]; if (event == NULL) { error = intr_event_create(&event, (void *)(uintptr_t)irq, 0, irq, riscv_mask_irq, riscv_unmask_irq, NULL, NULL, "int%d", irq); if (error) return (error); intr_events[irq] = event; riscv_unmask_irq((void*)(uintptr_t)irq); } error = intr_event_add_handler(event, name, filt, handler, arg, intr_priority(flags), flags, cookiep); if (error) { printf("Failed to setup intr: %d\n", irq); return (error); } riscv_intrcnt_setname(riscv_intr_counters[irq], event->ie_fullname); return (0); } int riscv_teardown_intr(void *ih) { /* TODO */ return (0); } int riscv_config_intr(u_int irq, enum intr_trigger trig, enum intr_polarity pol) { /* There is no configuration for interrupts */ return (0); } void riscv_cpu_intr(struct trapframe *frame) { struct intr_event *event; int active_irq; critical_enter(); KASSERT(frame->tf_scause & EXCP_INTR, ("riscv_cpu_intr: wrong frame passed")); active_irq = (frame->tf_scause & EXCP_MASK); switch (active_irq) { +#if 0 + /* lowRISC TODO */ case IRQ_UART: - case IRQ_SOFTWARE: - case IRQ_TIMER: +#endif + case IRQ_SOFTWARE_USER: + case IRQ_SOFTWARE_SUPERVISOR: + case IRQ_TIMER_SUPERVISOR: event = intr_events[active_irq]; /* Update counters */ atomic_add_long(riscv_intr_counters[active_irq], 1); PCPU_INC(cnt.v_intr); break; - case IRQ_HTIF: - /* HTIF interrupts are only handled in machine mode */ - panic("%s: HTIF interrupt", __func__); - break; default: event = NULL; } if (!event || TAILQ_EMPTY(&event->ie_handlers) || (intr_event_handle(event, frame) != 0)) printf("stray interrupt %d\n", active_irq); critical_exit(); } #ifdef SMP void riscv_setup_ipihandler(driver_filter_t *filt) { - riscv_setup_intr("ipi", filt, NULL, NULL, IRQ_SOFTWARE, + riscv_setup_intr("ipi", filt, NULL, NULL, IRQ_SOFTWARE_SUPERVISOR, INTR_TYPE_MISC, NULL); } void riscv_unmask_ipi(void) { csr_set(sie, SIE_SSIE); } /* Sending IPI */ static void ipi_send(struct pcpu *pc, int ipi) { CTR3(KTR_SMP, "%s: cpu=%d, ipi=%x", __func__, pc->pc_cpuid, ipi); atomic_set_32(&pc->pc_pending_ipis, ipi); machine_command(ECALL_SEND_IPI, pc->pc_reg); CTR1(KTR_SMP, "%s: sent", __func__); } void ipi_all_but_self(u_int ipi) { cpuset_t other_cpus; other_cpus = all_cpus; CPU_CLR(PCPU_GET(cpuid), &other_cpus); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); ipi_selected(other_cpus, ipi); } void ipi_cpu(int cpu, u_int ipi) { cpuset_t cpus; CPU_ZERO(&cpus); CPU_SET(cpu, &cpus); CTR3(KTR_SMP, "%s: cpu: %d, ipi: %x\n", __func__, cpu, ipi); ipi_send(cpuid_to_pcpu[cpu], ipi); } void ipi_selected(cpuset_t cpus, u_int ipi) { struct pcpu *pc; CTR1(KTR_SMP, "ipi_selected: ipi: %x", ipi); STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { if (CPU_ISSET(pc->pc_cpuid, &cpus)) { CTR3(KTR_SMP, "%s: pc: %p, ipi: %x\n", __func__, pc, ipi); ipi_send(pc, ipi); } } } #endif Index: head/sys/riscv/riscv/locore.S =================================================================== --- head/sys/riscv/riscv/locore.S (revision 303659) +++ head/sys/riscv/riscv/locore.S (revision 303660) @@ -1,382 +1,392 @@ /*- * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #include "assym.s" #include #include #include #include #include #include #define HTIF_RING_NENTRIES (512) #define HTIF_RING_ENTRY_SZ (24) #define HTIF_RING_SIZE (HTIF_RING_ENTRY_SZ * HTIF_RING_NENTRIES) #define HW_STACK_SIZE (96) /* - * Event queue for each CPU core: + * Event queue: * * struct htif_ring { * uint64_t data; * uint64_t used; * uint64_t next; * } htif_ring[HTIF_RING_NENTRIES]; * uint64_t htif_ring_cursor; * uint64_t htif_ring_last; */ .macro build_ring la t0, htif_ring -#ifdef SMP - csrr a0, mhartid - li s0, (HTIF_RING_SIZE + 16) - mulw s0, a0, s0 - add t0, t0, s0 -#endif li t1, 0 sd t1, 0(t0) /* zero data */ sd t1, 8(t0) /* zero used */ mv t2, t0 mv t3, t0 li t5, (HTIF_RING_SIZE) li t6, 0 add t4, t0, t5 1: addi t3, t3, HTIF_RING_ENTRY_SZ /* pointer to next */ beq t3, t4, 2f /* finish */ sd t3, 16(t2) /* store pointer */ addi t2, t2, HTIF_RING_ENTRY_SZ /* next entry */ addi t6, t6, 1 /* counter */ j 1b 2: addi t3, t3, -HTIF_RING_ENTRY_SZ sd t0, 16(t3) /* last -> first */ li t2, (HTIF_RING_SIZE) add s0, t0, t2 sd t0, 0(s0) /* cursor */ sd t0, 8(s0) /* last */ /* finish building ring */ .endm .globl kernbase .set kernbase, KERNBASE /* Trap entries */ .text mentry: - /* User mode entry point (mtvec + 0x000) */ - .align 6 - j user_trap + /* Vectors */ + j _start /* reset */ + j bad_trap /* NMI (non-maskable interrupt) */ + j machine_trap - /* Supervisor mode entry point (mtvec + 0x040) */ - .align 6 - j supervisor_trap - - /* Hypervisor mode entry point (mtvec + 0x080) */ - .align 6 - j bad_trap - - /* Machine mode entry point (mtvec + 0x0C0) */ - .align 6 - j bad_trap - /* Reset vector */ .text - .align 8 .globl _start _start: + /* Setup machine trap vector */ + la t0, machine_trap + csrw mtvec, t0 + + /* Delegate interrupts to supervisor mode */ + li t0, (MIP_SSIP | MIP_STIP | MIP_SEIP) + csrw mideleg, t0 + + /* Delegate exceptions to supervisor mode */ + li t0, (1 << EXCP_MISALIGNED_FETCH) | \ + (1 << EXCP_FAULT_FETCH) | \ + (1 << EXCP_ILLEGAL_INSTRUCTION) | \ + (1 << EXCP_FAULT_LOAD) | \ + (1 << EXCP_FAULT_STORE) | \ + (1 << EXCP_BREAKPOINT) | \ + (1 << EXCP_USER_ECALL) + csrw medeleg, t0 + + la t0, cpu_exception_handler + li t1, KERNBASE + add t0, t0, t1 + csrw stvec, t0 + /* Direct secondary cores to mpentry */ csrr a0, mhartid bnez a0, mpentry + li t1, 0 + la t0, tohost + sd t1, 0(t0) + la t0, fromhost + sd t1, 0(t0) + /* Build event queue for current core */ build_ring /* Setup machine-mode stack for CPU 0 */ la t0, hardstack_end csrw mscratch, t0 li t0, 0 csrw sscratch, t0 li s10, PAGE_SIZE li s9, (PAGE_SIZE * KSTACK_PAGES) /* Page tables */ /* Create an L1 page for early devmap */ la s1, pagetable_l1 la s2, pagetable_l2_devmap /* Link to next level PN */ srli s2, s2, PAGE_SHIFT li a5, (VM_MAX_KERNEL_ADDRESS - L2_SIZE) srli a5, a5, L1_SHIFT /* >> L1_SHIFT */ andi a5, a5, 0x1ff /* & 0x1ff */ - li t4, (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)) + li t4, PTE_V slli t5, s2, PTE_PPN0_S /* (s2 << PTE_PPN0_S) */ or t6, t4, t5 /* Store single level1 PTE entry to position */ li a6, PTE_SIZE mulw a5, a5, a6 add t0, s1, a5 sd t6, (t0) /* Add single Level 1 entry for kernel */ la s1, pagetable_l1 la s2, pagetable_l2 /* Link to next level PN */ srli s2, s2, PAGE_SHIFT - li a5, KERNBASE + li a5, (KERNBASE + KERNENTRY) srli a5, a5, L1_SHIFT /* >> L1_SHIFT */ andi a5, a5, 0x1ff /* & 0x1ff */ - li t4, (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)) + li t4, PTE_V slli t5, s2, PTE_PPN0_S /* (s2 << PTE_PPN0_S) */ or t6, t4, t5 /* Store single level1 PTE entry to position */ li a6, PTE_SIZE mulw a5, a5, a6 add t0, s1, a5 sd t6, (t0) /* Level 2 superpages (512 x 2MiB) */ la s1, pagetable_l2 - li t3, 512 /* Build 512 entries */ - li t4, 0 /* Counter */ + li t4, KERNENTRY + srli t4, t4, 21 /* Div by 2 MiB */ + li t2, 512 /* Build 512 entries */ + add t3, t4, t2 li t5, 0 2: - li t0, (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S)) + li t0, (PTE_V | PTE_RWX) slli t2, t4, PTE_PPN1_S /* << PTE_PPN1_S */ or t5, t0, t2 sd t5, (s1) /* Store PTE entry to position */ addi s1, s1, PTE_SIZE addi t4, t4, 1 bltu t4, t3, 2b /* Set page tables base register */ la s1, pagetable_l1 + srli s1, s1, PAGE_SHIFT csrw sptbr, s1 /* Page tables END */ /* Enter supervisor mode */ li s0, ((MSTATUS_VM_SV39 << MSTATUS_VM_SHIFT) | \ - (MSTATUS_PRV_M << MSTATUS_PRV_SHIFT) | \ - (MSTATUS_PRV_S << MSTATUS_PRV1_SHIFT) | \ - (MSTATUS_PRV_U << MSTATUS_PRV2_SHIFT)); + (MSTATUS_PRV_S << MSTATUS_MPP_SHIFT)); csrw mstatus, s0 /* * Enable machine-mode software interrupts * so we can deliver IPI to this core. */ li t0, MIE_MSIE csrs mie, t0 /* Exit from machine mode */ la t0, .Lmmu_on li s11, KERNBASE add t0, t0, s11 csrw mepc, t0 - eret + mret .Lmmu_on: /* Initialize stack pointer */ la s3, initstack_end mv sp, s3 addi sp, sp, -PCB_SIZE /* Clear BSS */ la a0, _C_LABEL(__bss_start) la s1, _C_LABEL(_end) 1: sd zero, 0(a0) addi a0, a0, 8 bltu a0, s1, 1b /* Fill riscv_bootparams */ addi sp, sp, -16 la t0, pagetable_l1 sd t0, 0(sp) /* kern_l1pt */ la t0, initstack_end sd t0, 8(sp) /* kern_stack */ mv a0, sp call _C_LABEL(initriscv) /* Off we go */ call _C_LABEL(mi_startup) .align 4 initstack: .space (PAGE_SIZE * KSTACK_PAGES) initstack_end: hardstack: .space (HW_STACK_SIZE * MAXCPU) hardstack_end: .globl htif_ring htif_ring: - .space ((HTIF_RING_SIZE + 16) * MAXCPU) - - .globl console_intr -console_intr: + .space (HTIF_RING_SIZE + 16) +htif_lock: .space (8) +tohost: + .space (8) +fromhost: + .space (8) ENTRY(sigcode) mv a0, sp addi a0, a0, SF_UC 1: li t0, SYS_sigreturn ecall /* sigreturn failed, exit */ li t0, SYS_exit ecall j 1b END(sigcode) /* This may be copied to the stack, keep it 16-byte aligned */ .align 3 esigcode: .data .align 3 .global szsigcode szsigcode: .quad esigcode - sigcode .align 12 pagetable_l1: .space PAGE_SIZE pagetable_l2: .space PAGE_SIZE pagetable_l2_devmap: .space PAGE_SIZE .globl init_pt_va init_pt_va: .quad pagetable_l2 /* XXX: Keep page tables VA */ #ifndef SMP ENTRY(mpentry) 1: wfi j 1b END(mpentry) #else /* * mpentry(unsigned long) * * Called by a core when it is being brought online. * The data in x0 is passed straight to init_secondary. */ ENTRY(mpentry) /* * Calculate the offset to __riscv_boot_ap * for current core, cpuid in a0. */ li t1, 4 mulw t1, t1, a0 /* Get pointer */ la t0, __riscv_boot_ap add t0, t0, t1 1: /* Wait the kernel to be ready */ lw t1, 0(t0) beqz t1, 1b - /* Build event queue ring for this core */ - build_ring - /* Set page tables base register */ la t0, pagetable_l1 + srli t0, t0, PAGE_SHIFT csrw sptbr, t0 /* Configure mstatus */ li s0, ((MSTATUS_VM_SV39 << MSTATUS_VM_SHIFT) | \ - (MSTATUS_PRV_M << MSTATUS_PRV_SHIFT) | \ - (MSTATUS_PRV_S << MSTATUS_PRV1_SHIFT) | \ - (MSTATUS_PRV_U << MSTATUS_PRV2_SHIFT)); + (MSTATUS_PRV_S << MSTATUS_MPP_SHIFT)); csrw mstatus, s0 /* Setup stack for machine mode exceptions */ la t0, hardstack_end li t1, HW_STACK_SIZE mulw t1, t1, a0 sub t0, t0, t1 csrw mscratch, t0 li t0, 0 csrw sscratch, t0 /* * Enable machine-mode software interrupts * so we can deliver IPI to this core. */ li t0, MIE_MSIE csrs mie, t0 /* * Exit from machine mode and go to * the virtual address space. */ la t0, mp_virtdone li s11, KERNBASE add t0, t0, s11 csrw mepc, t0 - eret + mret mp_virtdone: /* We are now in virtual address space */ /* Setup stack pointer */ la t0, secondary_stacks li t1, (PAGE_SIZE * KSTACK_PAGES) mulw t1, t1, a0 add sp, t0, t1 call init_secondary END(mpentry) #endif #include "exception.S" Index: head/sys/riscv/riscv/machdep.c =================================================================== --- head/sys/riscv/riscv/machdep.c (revision 303659) +++ head/sys/riscv/riscv/machdep.c (revision 303660) @@ -1,797 +1,798 @@ /*- * Copyright (c) 2014 Andrew Turner * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VFP #include #endif #ifdef FDT #include #include #endif struct pcpu __pcpu[MAXCPU]; static struct trapframe proc0_tf; vm_paddr_t phys_avail[PHYS_AVAIL_SIZE + 2]; vm_paddr_t dump_avail[PHYS_AVAIL_SIZE + 2]; int early_boot = 1; int cold = 1; long realmem = 0; long Maxmem = 0; #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t physmap[PHYSMAP_SIZE]; u_int physmap_idx; struct kva_md_info kmi; int64_t dcache_line_size; /* The minimum D cache line size */ int64_t icache_line_size; /* The minimum I cache line size */ int64_t idcache_line_size; /* The minimum cache line size */ extern int *end; extern int *initstack_end; struct pcpu *pcpup; uintptr_t mcall_trap(uintptr_t mcause, uintptr_t* regs); uintptr_t mcall_trap(uintptr_t mcause, uintptr_t* regs) { return (0); } static void cpu_startup(void *dummy) { identify_cpu(); vm_ksubmap_init(&kmi); bufinit(); vm_pager_bufferinit(); } SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); int cpu_idle_wakeup(int cpu) { return (0); } void bzero(void *buf, size_t len) { uint8_t *p; p = buf; while(len-- > 0) *p++ = 0; } int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *frame; frame = td->td_frame; regs->sepc = frame->tf_sepc; regs->sstatus = frame->tf_sstatus; regs->ra = frame->tf_ra; regs->sp = frame->tf_sp; regs->gp = frame->tf_gp; regs->tp = frame->tf_tp; memcpy(regs->t, frame->tf_t, sizeof(regs->t)); memcpy(regs->s, frame->tf_s, sizeof(regs->s)); memcpy(regs->a, frame->tf_a, sizeof(regs->a)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *frame; frame = td->td_frame; frame->tf_sepc = regs->sepc; frame->tf_sstatus = regs->sstatus; frame->tf_ra = regs->ra; frame->tf_sp = regs->sp; frame->tf_gp = regs->gp; frame->tf_tp = regs->tp; memcpy(frame->tf_t, regs->t, sizeof(frame->tf_t)); memcpy(frame->tf_s, regs->s, sizeof(frame->tf_s)); memcpy(frame->tf_a, regs->a, sizeof(frame->tf_a)); return (0); } int fill_fpregs(struct thread *td, struct fpreg *regs) { /* TODO */ bzero(regs, sizeof(*regs)); return (0); } int set_fpregs(struct thread *td, struct fpreg *regs) { /* TODO */ return (0); } int fill_dbregs(struct thread *td, struct dbreg *regs) { panic("fill_dbregs"); } int set_dbregs(struct thread *td, struct dbreg *regs) { panic("set_dbregs"); } int ptrace_set_pc(struct thread *td, u_long addr) { panic("ptrace_set_pc"); return (0); } int ptrace_single_step(struct thread *td) { /* TODO; */ return (0); } int ptrace_clear_single_step(struct thread *td) { /* TODO; */ return (0); } void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *tf; tf = td->td_frame; memset(tf, 0, sizeof(struct trapframe)); /* * We need to set a0 for init as it doesn't call * cpu_set_syscall_retval to copy the value. We also * need to set td_retval for the cases where we do. */ tf->tf_a[0] = td->td_retval[0] = stack; tf->tf_sp = STACKALIGN(stack); tf->tf_ra = imgp->entry_addr; tf->tf_sepc = imgp->entry_addr; } /* Sanity check these are the same size, they will be memcpy'd to and fro */ CTASSERT(sizeof(((struct trapframe *)0)->tf_a) == sizeof((struct gpregs *)0)->gp_a); CTASSERT(sizeof(((struct trapframe *)0)->tf_s) == sizeof((struct gpregs *)0)->gp_s); CTASSERT(sizeof(((struct trapframe *)0)->tf_t) == sizeof((struct gpregs *)0)->gp_t); CTASSERT(sizeof(((struct trapframe *)0)->tf_a) == sizeof((struct reg *)0)->a); CTASSERT(sizeof(((struct trapframe *)0)->tf_s) == sizeof((struct reg *)0)->s); CTASSERT(sizeof(((struct trapframe *)0)->tf_t) == sizeof((struct reg *)0)->t); int get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret) { struct trapframe *tf = td->td_frame; memcpy(mcp->mc_gpregs.gp_t, tf->tf_t, sizeof(mcp->mc_gpregs.gp_t)); memcpy(mcp->mc_gpregs.gp_s, tf->tf_s, sizeof(mcp->mc_gpregs.gp_s)); memcpy(mcp->mc_gpregs.gp_a, tf->tf_a, sizeof(mcp->mc_gpregs.gp_a)); if (clear_ret & GET_MC_CLEAR_RET) { mcp->mc_gpregs.gp_a[0] = 0; mcp->mc_gpregs.gp_t[0] = 0; /* clear syscall error */ } mcp->mc_gpregs.gp_ra = tf->tf_ra; mcp->mc_gpregs.gp_sp = tf->tf_sp; mcp->mc_gpregs.gp_gp = tf->tf_gp; mcp->mc_gpregs.gp_tp = tf->tf_tp; mcp->mc_gpregs.gp_sepc = tf->tf_sepc; mcp->mc_gpregs.gp_sstatus = tf->tf_sstatus; return (0); } int set_mcontext(struct thread *td, mcontext_t *mcp) { struct trapframe *tf; tf = td->td_frame; memcpy(tf->tf_t, mcp->mc_gpregs.gp_t, sizeof(tf->tf_t)); memcpy(tf->tf_s, mcp->mc_gpregs.gp_s, sizeof(tf->tf_s)); memcpy(tf->tf_a, mcp->mc_gpregs.gp_a, sizeof(tf->tf_a)); tf->tf_ra = mcp->mc_gpregs.gp_ra; tf->tf_sp = mcp->mc_gpregs.gp_sp; tf->tf_gp = mcp->mc_gpregs.gp_gp; tf->tf_tp = mcp->mc_gpregs.gp_tp; tf->tf_sepc = mcp->mc_gpregs.gp_sepc; tf->tf_sstatus = mcp->mc_gpregs.gp_sstatus; return (0); } static void get_fpcontext(struct thread *td, mcontext_t *mcp) { /* TODO */ } static void set_fpcontext(struct thread *td, mcontext_t *mcp) { /* TODO */ } void cpu_idle(int busy) { spinlock_enter(); if (!busy) cpu_idleclock(); if (!sched_runnable()) __asm __volatile( "fence \n" "wfi \n"); if (!busy) cpu_activeclock(); spinlock_exit(); } void cpu_halt(void) { panic("cpu_halt"); } /* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */ void cpu_flush_dcache(void *ptr, size_t len) { /* TBD */ } /* Get current clock frequency for the given CPU ID. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { panic("cpu_est_clockrate"); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { } void spinlock_enter(void) { struct thread *td; td = curthread; if (td->td_md.md_spinlock_count == 0) { td->td_md.md_spinlock_count = 1; td->td_md.md_saved_sstatus_ie = intr_disable(); } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t sstatus_ie; td = curthread; critical_exit(); sstatus_ie = td->td_md.md_saved_sstatus_ie; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(sstatus_ie); } #ifndef _SYS_SYSPROTO_H_ struct sigreturn_args { ucontext_t *ucp; }; #endif int sys_sigreturn(struct thread *td, struct sigreturn_args *uap) { uint64_t sstatus; ucontext_t uc; int error; if (uap == NULL) return (EFAULT); if (copyin(uap->sigcntxp, &uc, sizeof(uc))) return (EFAULT); /* * Make sure the processor mode has not been tampered with and * interrupts have not been disabled. + * Supervisor interrupts in user mode are always enabled. */ sstatus = uc.uc_mcontext.mc_gpregs.gp_sstatus; - if ((sstatus & SSTATUS_PS) != 0 || - (sstatus & SSTATUS_PIE) == 0) + if ((sstatus & SSTATUS_SPP) != 0) return (EINVAL); error = set_mcontext(td, &uc.uc_mcontext); if (error != 0) return (error); set_fpcontext(td, &uc.uc_mcontext); /* Restore signal mask. */ kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); return (EJUSTRETURN); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { memcpy(pcb->pcb_t, tf->tf_t, sizeof(tf->tf_t)); memcpy(pcb->pcb_s, tf->tf_s, sizeof(tf->tf_s)); memcpy(pcb->pcb_a, tf->tf_a, sizeof(tf->tf_a)); pcb->pcb_ra = tf->tf_ra; pcb->pcb_sp = tf->tf_sp; pcb->pcb_gp = tf->tf_gp; pcb->pcb_tp = tf->tf_tp; pcb->pcb_sepc = tf->tf_sepc; } void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct sigframe *fp, frame; struct sysentvec *sysent; struct trapframe *tf; struct sigacts *psp; struct thread *td; struct proc *p; int onstack; int code; int sig; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; code = ksi->ksi_code; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; onstack = sigonstack(tf->tf_sp); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size); } else { fp = (struct sigframe *)td->td_frame->tf_sp; } /* Make room, keeping the stack aligned */ fp--; fp = (struct sigframe *)STACKALIGN(fp); /* Fill in the frame to copy out */ get_mcontext(td, &frame.sf_uc.uc_mcontext, 0); get_fpcontext(td, &frame.sf_uc.uc_mcontext); frame.sf_si = ksi->ksi_info; frame.sf_uc.uc_sigmask = *mask; frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE; frame.sf_uc.uc_stack = td->td_sigstk; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(td->td_proc); /* Copy the sigframe out to the user's stack. */ if (copyout(&frame, fp, sizeof(*fp)) != 0) { /* Process has trashed its stack. Kill it. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp); PROC_LOCK(p); sigexit(td, SIGILL); } tf->tf_a[0] = sig; tf->tf_a[1] = (register_t)&fp->sf_si; tf->tf_a[2] = (register_t)&fp->sf_uc; tf->tf_sepc = (register_t)catcher; tf->tf_sp = (register_t)fp; sysent = p->p_sysent; if (sysent->sv_sigcode_base != 0) tf->tf_ra = (register_t)sysent->sv_sigcode_base; else tf->tf_ra = (register_t)(sysent->sv_psstrings - *(sysent->sv_szsigcode)); CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_sepc, tf->tf_sp); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } static void init_proc0(vm_offset_t kstack) { pcpup = &__pcpu[0]; proc_linkup0(&proc0, &thread0); thread0.td_kstack = kstack; thread0.td_pcb = (struct pcb *)(thread0.td_kstack) - 1; thread0.td_frame = &proc0_tf; pcpup->pc_curpcb = thread0.td_pcb; } static int add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, u_int *physmap_idxp) { u_int i, insert_idx, _physmap_idx; _physmap_idx = *physmap_idxp; if (length == 0) return (1); /* * Find insertion point while checking for overlap. Start off by * assuming the new entry will be added to the end. */ insert_idx = _physmap_idx; for (i = 0; i <= _physmap_idx; i += 2) { if (base < physmap[i + 1]) { if (base + length <= physmap[i]) { insert_idx = i; break; } if (boothowto & RB_VERBOSE) printf( "Overlapping memory regions, ignoring second region\n"); return (1); } } /* See if we can prepend to the next entry. */ if (insert_idx <= _physmap_idx && base + length == physmap[insert_idx]) { physmap[insert_idx] = base; return (1); } /* See if we can append to the previous entry. */ if (insert_idx > 0 && base == physmap[insert_idx - 1]) { physmap[insert_idx - 1] += length; return (1); } _physmap_idx += 2; *physmap_idxp = _physmap_idx; if (_physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); return (0); } /* * Move the last 'N' entries down to make room for the new * entry if needed. */ for (i = _physmap_idx; i > insert_idx; i -= 2) { physmap[i] = physmap[i - 2]; physmap[i + 1] = physmap[i - 1]; } /* Insert the new entry. */ physmap[insert_idx] = base; physmap[insert_idx + 1] = base + length; printf("physmap[%d] = 0x%016lx\n", insert_idx, base); printf("physmap[%d] = 0x%016lx\n", insert_idx + 1, base + length); return (1); } #ifdef FDT static void try_load_dtb(caddr_t kmdp) { vm_offset_t dtbp; dtbp = (vm_offset_t)&fdt_static_dtb; if (dtbp == (vm_offset_t)NULL) { printf("ERROR loading DTB\n"); return; } if (OF_install(OFW_FDT, 0) == FALSE) panic("Cannot install FDT"); if (OF_init((void *)dtbp) != 0) panic("OF_init failed with the found device tree"); } #endif static void cache_setup(void) { /* TODO */ } /* * Fake up a boot descriptor table. * RISCVTODO: This needs to be done via loader (when it's available). */ vm_offset_t fake_preload_metadata(struct riscv_bootparams *rvbp __unused) { #ifdef DDB vm_offset_t zstart = 0, zend = 0; #endif vm_offset_t lastaddr; int i = 0; static uint32_t fake_preload[35]; fake_preload[i++] = MODINFO_NAME; fake_preload[i++] = strlen("kernel") + 1; strcpy((char*)&fake_preload[i++], "kernel"); i += 1; fake_preload[i++] = MODINFO_TYPE; fake_preload[i++] = strlen("elf64 kernel") + 1; strcpy((char*)&fake_preload[i++], "elf64 kernel"); i += 3; fake_preload[i++] = MODINFO_ADDR; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = (uint64_t)(KERNBASE + KERNENTRY); i += 1; fake_preload[i++] = MODINFO_SIZE; fake_preload[i++] = sizeof(uint64_t); printf("end is 0x%016lx\n", (uint64_t)&end); fake_preload[i++] = (uint64_t)&end - (uint64_t)(KERNBASE + KERNENTRY); i += 1; #ifdef DDB #if 0 /* RISCVTODO */ if (*(uint32_t *)KERNVIRTADDR == MAGIC_TRAMP_NUMBER) { fake_preload[i++] = MODINFO_METADATA|MODINFOMD_SSYM; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 4); fake_preload[i++] = MODINFO_METADATA|MODINFOMD_ESYM; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 8); lastaddr = *(uint32_t *)(KERNVIRTADDR + 8); zend = lastaddr; zstart = *(uint32_t *)(KERNVIRTADDR + 4); db_fetch_ksymtab(zstart, zend); } else #endif #endif lastaddr = (vm_offset_t)&end; fake_preload[i++] = 0; fake_preload[i] = 0; preload_metadata = (void *)fake_preload; return (lastaddr); } void initriscv(struct riscv_bootparams *rvbp) { struct mem_region mem_regions[FDT_MEM_REGIONS]; vm_offset_t lastaddr; int mem_regions_sz; vm_size_t kernlen; caddr_t kmdp; int i; /* Set the module data location */ lastaddr = fake_preload_metadata(rvbp); /* Find the kernel address */ kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); - boothowto = 0; + boothowto = RB_VERBOSE | RB_SINGLE; + boothowto = RB_VERBOSE; kern_envp = NULL; #ifdef FDT try_load_dtb(kmdp); #endif /* Load the physical memory ranges */ physmap_idx = 0; /* Grab physical memory regions information from device tree. */ if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, NULL) != 0) panic("Cannot get physical memory regions"); for (i = 0; i < mem_regions_sz; i++) add_physmap_entry(mem_regions[i].mr_start, mem_regions[i].mr_size, physmap, &physmap_idx); /* Set the pcpu data, this is needed by pmap_bootstrap */ pcpup = &__pcpu[0]; pcpu_init(pcpup, 0, sizeof(struct pcpu)); /* Set the pcpu pointer */ __asm __volatile("mv gp, %0" :: "r"(pcpup)); PCPU_SET(curthread, &thread0); /* Do basic tuning, hz etc */ init_param1(); cache_setup(); - /* Bootstrap enough of pmap to enter the kernel proper */ + /* Bootstrap enough of pmap to enter the kernel proper */ kernlen = (lastaddr - KERNBASE); pmap_bootstrap(rvbp->kern_l1pt, KERNENTRY, kernlen); cninit(); init_proc0(rvbp->kern_stack); /* set page table base register for thread0 */ thread0.td_pcb->pcb_l1addr = (rvbp->kern_l1pt - KERNBASE); msgbufinit(msgbufp, msgbufsize); mutex_init(); init_param2(physmem); kdb_init(); riscv_init_interrupts(); early_boot = 0; } Index: head/sys/riscv/riscv/pmap.c =================================================================== --- head/sys/riscv/riscv/pmap.c (revision 303659) +++ head/sys/riscv/riscv/pmap.c (revision 303660) @@ -1,3278 +1,3282 @@ /*- * Copyright (c) 1991 Regents of the University of California. * All rights reserved. * Copyright (c) 1994 John S. Dyson * All rights reserved. * Copyright (c) 1994 David Greenman * All rights reserved. * Copyright (c) 2003 Peter Wemm * All rights reserved. * Copyright (c) 2005-2010 Alan L. Cox * All rights reserved. * Copyright (c) 2014 Andrew Turner * All rights reserved. * Copyright (c) 2014 The FreeBSD Foundation * All rights reserved. * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * This code is derived from software contributed to Berkeley by * the Systems Programming Group of the University of Utah Computer * Science Department and William Jolitz of UUNET Technologies Inc. * * Portions of this software were developed by Andrew Turner under * sponsorship from The FreeBSD Foundation. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 */ /*- * Copyright (c) 2003 Networks Associates Technology, Inc. * All rights reserved. * * This software was developed for the FreeBSD Project by Jake Burkholder, * Safeport Network Services, and Network Associates Laboratories, the * Security Research Division of Network Associates, Inc. under * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA * CHATS research program. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); /* * Manages physical address maps. * * Since the information managed by this module is * also stored by the logical address mapping module, * this module may throw away valid virtual-to-physical * mappings at almost any time. However, invalidations * of virtual-to-physical mappings must be done as * requested. * * In order to cope with hardware architectures which * make virtual-to-physical map invalidates expensive, * this module may delay invalidate or reduced protection * operations until such time as they are actually * necessary. This module is given full information as * to which processors are currently using which maps, * and to when physical maps must be made correct. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) #define NUPDE (NPDEPG * NPDEPG) #define NUSERPGTBLS (NUPDE + NPDEPG) #if !defined(DIAGNOSTIC) #ifdef __GNUC_GNU_INLINE__ #define PMAP_INLINE __attribute__((__gnu_inline__)) inline #else #define PMAP_INLINE extern inline #endif #else #define PMAP_INLINE #endif #ifdef PV_STATS #define PV_STAT(x) do { x ; } while (0) #else #define PV_STAT(x) do { } while (0) #endif #define pmap_l2_pindex(v) ((v) >> L2_SHIFT) #define NPV_LIST_LOCKS MAXCPU #define PHYS_TO_PV_LIST_LOCK(pa) \ (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ struct rwlock **_lockp = (lockp); \ struct rwlock *_new_lock; \ \ _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ if (_new_lock != *_lockp) { \ if (*_lockp != NULL) \ rw_wunlock(*_lockp); \ *_lockp = _new_lock; \ rw_wlock(*_lockp); \ } \ } while (0) #define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) #define RELEASE_PV_LIST_LOCK(lockp) do { \ struct rwlock **_lockp = (lockp); \ \ if (*_lockp != NULL) { \ rw_wunlock(*_lockp); \ *_lockp = NULL; \ } \ } while (0) #define VM_PAGE_TO_PV_LIST_LOCK(m) \ PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) /* The list of all the user pmaps */ LIST_HEAD(pmaplist, pmap); static struct pmaplist allpmaps; static MALLOC_DEFINE(M_VMPMAP, "pmap", "PMAP L1"); struct pmap kernel_pmap_store; vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ vm_offset_t kernel_vm_end = 0; struct msgbuf *msgbufp = NULL; static struct rwlock_padalign pvh_global_lock; /* * Data for the pv entry allocation mechanism */ static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); static struct mtx pv_chunks_mutex; static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; static void free_pv_chunk(struct pv_chunk *pc); static void free_pv_entry(pmap_t pmap, pv_entry_t pv); static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va); static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp); static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp); static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free); static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); /* * These load the old table data and store the new value. * They need to be atomic as the System MMU may write to the table at * the same time as the CPU. */ #define pmap_load_store(table, entry) atomic_swap_64(table, entry) #define pmap_set(table, mask) atomic_set_64(table, mask) #define pmap_load_clear(table) atomic_swap_64(table, 0) #define pmap_load(table) (*table) /********************/ /* Inline functions */ /********************/ static __inline void pagecopy(void *s, void *d) { memcpy(d, s, PAGE_SIZE); } static __inline void pagezero(void *p) { bzero(p, PAGE_SIZE); } #define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) #define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) #define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) #define PTE_TO_PHYS(pte) ((pte >> PTE_PPN0_S) * PAGE_SIZE) static __inline pd_entry_t * pmap_l1(pmap_t pmap, vm_offset_t va) { return (&pmap->pm_l1[pmap_l1_index(va)]); } static __inline pd_entry_t * pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va) { vm_paddr_t phys; pd_entry_t *l2; phys = PTE_TO_PHYS(pmap_load(l1)); l2 = (pd_entry_t *)PHYS_TO_DMAP(phys); return (&l2[pmap_l2_index(va)]); } static __inline pd_entry_t * pmap_l2(pmap_t pmap, vm_offset_t va) { pd_entry_t *l1; l1 = pmap_l1(pmap, va); if (l1 == NULL) return (NULL); - if ((pmap_load(l1) & PTE_VALID) == 0) + if ((pmap_load(l1) & PTE_V) == 0) return (NULL); - if ((pmap_load(l1) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S)) + if ((pmap_load(l1) & PTE_RX) != 0) return (NULL); return (pmap_l1_to_l2(l1, va)); } static __inline pt_entry_t * pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va) { vm_paddr_t phys; pt_entry_t *l3; phys = PTE_TO_PHYS(pmap_load(l2)); l3 = (pd_entry_t *)PHYS_TO_DMAP(phys); return (&l3[pmap_l3_index(va)]); } static __inline pt_entry_t * pmap_l3(pmap_t pmap, vm_offset_t va) { pd_entry_t *l2; l2 = pmap_l2(pmap, va); if (l2 == NULL) return (NULL); - if ((pmap_load(l2) & PTE_VALID) == 0) + if ((pmap_load(l2) & PTE_V) == 0) return (NULL); - if ((pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S)) + if ((pmap_load(l2) & PTE_RX) != 0) return (NULL); return (pmap_l2_to_l3(l2, va)); } static __inline int pmap_is_write(pt_entry_t entry) { - if (entry & (1 << PTE_TYPE_S)) - return (1); - - return (0); + return (entry & PTE_W); } static __inline int pmap_is_current(pmap_t pmap) { return ((pmap == pmap_kernel()) || (pmap == curthread->td_proc->p_vmspace->vm_map.pmap)); } static __inline int pmap_l3_valid(pt_entry_t l3) { - return (l3 & PTE_VALID); + return (l3 & PTE_V); } static __inline int pmap_l3_valid_cacheable(pt_entry_t l3) { /* TODO */ return (0); } #define PTE_SYNC(pte) cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte)) /* Checks if the page is dirty. */ static inline int pmap_page_dirty(pt_entry_t pte) { - return (pte & PTE_DIRTY); + return (pte & PTE_D); } static __inline void pmap_resident_count_inc(pmap_t pmap, int count) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); pmap->pm_stats.resident_count += count; } static __inline void pmap_resident_count_dec(pmap_t pmap, int count) { PMAP_LOCK_ASSERT(pmap, MA_OWNED); KASSERT(pmap->pm_stats.resident_count >= count, ("pmap %p resident count underflow %ld %d", pmap, pmap->pm_stats.resident_count, count)); pmap->pm_stats.resident_count -= count; } static void pmap_distribute_l1(struct pmap *pmap, vm_pindex_t l1index, pt_entry_t entry) { struct pmap *user_pmap; pd_entry_t *l1; /* Distribute new kernel L1 entry to all the user pmaps */ if (pmap != kernel_pmap) return; LIST_FOREACH(user_pmap, &allpmaps, pm_list) { l1 = &user_pmap->pm_l1[l1index]; if (entry) pmap_load_store(l1, entry); else pmap_load_clear(l1); } } static pt_entry_t * pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot, u_int *l2_slot) { pt_entry_t *l2; pd_entry_t *l1; l1 = (pd_entry_t *)l1pt; *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK; /* Check locore has used a table L1 map */ - KASSERT((l1[*l1_slot] & PTE_TYPE_M) == (PTE_TYPE_PTR << PTE_TYPE_S), + KASSERT((l1[*l1_slot] & PTE_RX) == 0, ("Invalid bootstrap L1 table")); /* Find the address of the L2 table */ l2 = (pt_entry_t *)init_pt_va; *l2_slot = pmap_l2_index(va); return (l2); } static vm_paddr_t pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) { u_int l1_slot, l2_slot; pt_entry_t *l2; u_int ret; l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); /* L2 is superpages */ ret = (l2[l2_slot] >> PTE_PPN1_S) << L2_SHIFT; ret += (va & L2_OFFSET); return (ret); } static void pmap_bootstrap_dmap(vm_offset_t l1pt, vm_paddr_t kernstart) { vm_offset_t va; vm_paddr_t pa; pd_entry_t *l1; u_int l1_slot; pt_entry_t entry; pn_t pn; + /* + * Initialize DMAP starting from zero physical address. + * TODO: remove this once machine-mode code splitted out. + */ + kernstart = 0; + printf("%s: l1pt 0x%016lx kernstart 0x%016lx\n", __func__, l1pt, kernstart); + pa = kernstart & ~L1_OFFSET; va = DMAP_MIN_ADDRESS; l1 = (pd_entry_t *)l1pt; l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS); for (; va < DMAP_MAX_ADDRESS; pa += L1_SIZE, va += L1_SIZE, l1_slot++) { KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); /* superpages */ pn = (pa / PAGE_SIZE); - entry = (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S)); + entry = (PTE_V | PTE_RWX); entry |= (pn << PTE_PPN0_S); pmap_load_store(&l1[l1_slot], entry); } cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); cpu_tlb_flushID(); } static vm_offset_t pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start) { vm_offset_t l2pt, l3pt; pt_entry_t entry; pd_entry_t *l2; vm_paddr_t pa; u_int l2_slot; pn_t pn; KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address")); l2 = pmap_l2(kernel_pmap, va); l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1)); l2pt = (vm_offset_t)l2; l2_slot = pmap_l2_index(va); l3pt = l3_start; for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) { KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index")); pa = pmap_early_vtophys(l1pt, l3pt); pn = (pa / PAGE_SIZE); - entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)); + entry = (PTE_V); entry |= (pn << PTE_PPN0_S); pmap_load_store(&l2[l2_slot], entry); l3pt += PAGE_SIZE; } + /* Clean the L2 page table */ memset((void *)l3_start, 0, l3pt - l3_start); cpu_dcache_wb_range(l3_start, l3pt - l3_start); cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE); - return l3pt; + return (l3pt); } /* * Bootstrap the system enough to run with virtual memory. */ void pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) { u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot; uint64_t kern_delta; pt_entry_t *l2; vm_offset_t va, freemempos; vm_offset_t dpcpu, msgbufpv; vm_paddr_t pa, min_pa; int i; kern_delta = KERNBASE - kernstart; physmem = 0; printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); printf("%lx\n", l1pt); printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); /* Set this early so we can use the pagetable walking functions */ kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt; PMAP_LOCK_INIT(kernel_pmap); /* * Initialize the global pv list lock. */ rw_init(&pvh_global_lock, "pmap pv global"); LIST_INIT(&allpmaps); /* Assume the address we were loaded to is a valid physical address */ min_pa = KERNBASE - kern_delta; /* * Find the minimum physical address. physmap is sorted, * but may contain empty ranges. */ for (i = 0; i < (physmap_idx * 2); i += 2) { if (physmap[i] == physmap[i + 1]) continue; if (physmap[i] <= min_pa) min_pa = physmap[i]; break; } /* Create a direct map region early so we can use it for pa -> va */ pmap_bootstrap_dmap(l1pt, min_pa); va = KERNBASE; pa = KERNBASE - kern_delta; /* * Start to initialize phys_avail by copying from physmap * up to the physical address KERNBASE points at. */ map_slot = avail_slot = 0; for (; map_slot < (physmap_idx * 2); map_slot += 2) { if (physmap[map_slot] == physmap[map_slot + 1]) continue; + if (physmap[map_slot] <= pa && + physmap[map_slot + 1] > pa) + break; + phys_avail[avail_slot] = physmap[map_slot]; phys_avail[avail_slot + 1] = physmap[map_slot + 1]; physmem += (phys_avail[avail_slot + 1] - phys_avail[avail_slot]) >> PAGE_SHIFT; avail_slot += 2; } /* Add the memory before the kernel */ if (physmap[avail_slot] < pa) { phys_avail[avail_slot] = physmap[map_slot]; phys_avail[avail_slot + 1] = pa; physmem += (phys_avail[avail_slot + 1] - phys_avail[avail_slot]) >> PAGE_SHIFT; avail_slot += 2; } used_map_slot = map_slot; /* * Read the page table to find out what is already mapped. * This assumes we have mapped a block of memory from KERNBASE * using a single L1 entry. */ l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot); /* Sanity check the index, KERNBASE should be the first VA */ KASSERT(l2_slot == 0, ("The L2 index is non-zero")); /* Find how many pages we have mapped */ for (; l2_slot < Ln_ENTRIES; l2_slot++) { - if ((l2[l2_slot] & PTE_VALID) == 0) + if ((l2[l2_slot] & PTE_V) == 0) break; /* Check locore used L2 superpages */ - KASSERT((l2[l2_slot] & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S), + KASSERT((l2[l2_slot] & PTE_RX) != 0, ("Invalid bootstrap L2 table")); va += L2_SIZE; pa += L2_SIZE; } va = roundup2(va, L2_SIZE); freemempos = KERNBASE + kernlen; freemempos = roundup2(freemempos, PAGE_SIZE); /* Create the l3 tables for the early devmap */ freemempos = pmap_bootstrap_l3(l1pt, VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos); cpu_tlb_flushID(); #define alloc_pages(var, np) \ (var) = freemempos; \ freemempos += (np * PAGE_SIZE); \ memset((char *)(var), 0, ((np) * PAGE_SIZE)); /* Allocate dynamic per-cpu area. */ alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); dpcpu_init((void *)dpcpu, 0); /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); msgbufp = (void *)msgbufpv; virtual_avail = roundup2(freemempos, L2_SIZE); virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE; kernel_vm_end = virtual_avail; pa = pmap_early_vtophys(l1pt, freemempos); /* Finish initialising physmap */ map_slot = used_map_slot; for (; avail_slot < (PHYS_AVAIL_SIZE - 2) && map_slot < (physmap_idx * 2); map_slot += 2) { - if (physmap[map_slot] == physmap[map_slot + 1]) + if (physmap[map_slot] == physmap[map_slot + 1]) { continue; + } /* Have we used the current range? */ - if (physmap[map_slot + 1] <= pa) + if (physmap[map_slot + 1] <= pa) { continue; + } /* Do we need to split the entry? */ if (physmap[map_slot] < pa) { phys_avail[avail_slot] = pa; phys_avail[avail_slot + 1] = physmap[map_slot + 1]; } else { phys_avail[avail_slot] = physmap[map_slot]; phys_avail[avail_slot + 1] = physmap[map_slot + 1]; } physmem += (phys_avail[avail_slot + 1] - phys_avail[avail_slot]) >> PAGE_SHIFT; avail_slot += 2; } phys_avail[avail_slot] = 0; phys_avail[avail_slot + 1] = 0; /* * Maxmem isn't the "maximum memory", it's one larger than the * highest page of the physical address space. It should be * called something like "Maxphyspage". */ Maxmem = atop(phys_avail[avail_slot - 1]); cpu_tlb_flushID(); } /* * Initialize a vm_page's machine-dependent fields. */ void pmap_page_init(vm_page_t m) { TAILQ_INIT(&m->md.pv_list); m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; } /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap * system needs to map virtual memory. */ void pmap_init(void) { int i; /* * Initialize the pv chunk list mutex. */ mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); /* * Initialize the pool of pv list locks. */ for (i = 0; i < NPV_LIST_LOCKS; i++) rw_init(&pv_list_locks[i], "pmap pv list"); } /* * Normal, non-SMP, invalidation functions. * We inline these within pmap.c for speed. */ PMAP_INLINE void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { /* TODO */ sched_pin(); __asm __volatile("sfence.vm"); sched_unpin(); } PMAP_INLINE void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { /* TODO */ sched_pin(); __asm __volatile("sfence.vm"); sched_unpin(); } PMAP_INLINE void pmap_invalidate_all(pmap_t pmap) { /* TODO */ sched_pin(); __asm __volatile("sfence.vm"); sched_unpin(); } /* * Routine: pmap_extract * Function: * Extract the physical page address associated * with the given map/virtual_address pair. */ vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { pd_entry_t *l2p, l2; pt_entry_t *l3p, l3; vm_paddr_t pa; pa = 0; PMAP_LOCK(pmap); /* * Start with the l2 tabel. We are unable to allocate * pages in the l1 table. */ l2p = pmap_l2(pmap, va); if (l2p != NULL) { l2 = pmap_load(l2p); - if ((l2 & PTE_TYPE_M) == (PTE_TYPE_PTR << PTE_TYPE_S)) { + if ((l2 & PTE_RX) == 0) { l3p = pmap_l2_to_l3(l2p, va); if (l3p != NULL) { l3 = pmap_load(l3p); pa = PTE_TO_PHYS(l3); pa |= (va & L3_OFFSET); } } else { /* L2 is superpages */ pa = (l2 >> PTE_PPN1_S) << L2_SHIFT; pa |= (va & L2_OFFSET); } } PMAP_UNLOCK(pmap); return (pa); } /* * Routine: pmap_extract_and_hold * Function: * Atomically extract and hold the physical page * with the given pmap and virtual address pair * if that mapping permits the given protection. */ vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { pt_entry_t *l3p, l3; vm_paddr_t phys; vm_paddr_t pa; vm_page_t m; pa = 0; m = NULL; PMAP_LOCK(pmap); retry: l3p = pmap_l3(pmap, va); if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) { if ((pmap_is_write(l3)) || ((prot & VM_PROT_WRITE) == 0)) { phys = PTE_TO_PHYS(l3); if (vm_page_pa_tryrelock(pmap, phys, &pa)) goto retry; m = PHYS_TO_VM_PAGE(phys); vm_page_hold(m); } } PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } vm_paddr_t pmap_kextract(vm_offset_t va) { pd_entry_t *l2; pt_entry_t *l3; vm_paddr_t pa; if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { pa = DMAP_TO_PHYS(va); } else { l2 = pmap_l2(kernel_pmap, va); if (l2 == NULL) panic("pmap_kextract: No l2"); - if ((pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S)) { + if ((pmap_load(l2) & PTE_RX) != 0) { /* superpages */ pa = (pmap_load(l2) >> PTE_PPN1_S) << L2_SHIFT; pa |= (va & L2_OFFSET); return (pa); } l3 = pmap_l2_to_l3(l2, va); if (l3 == NULL) panic("pmap_kextract: No l3..."); pa = PTE_TO_PHYS(pmap_load(l3)); pa |= (va & PAGE_MASK); } return (pa); } /*************************************************** * Low level mapping routines..... ***************************************************/ void pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) { pt_entry_t entry; pt_entry_t *l3; vm_offset_t va; pn_t pn; KASSERT((pa & L3_OFFSET) == 0, ("pmap_kenter_device: Invalid physical address")); KASSERT((sva & L3_OFFSET) == 0, ("pmap_kenter_device: Invalid virtual address")); KASSERT((size & PAGE_MASK) == 0, ("pmap_kenter_device: Mapping is not page-sized")); va = sva; while (size != 0) { l3 = pmap_l3(kernel_pmap, va); KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); pn = (pa / PAGE_SIZE); - entry = (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S)); + entry = (PTE_V | PTE_RWX); entry |= (pn << PTE_PPN0_S); pmap_load_store(l3, entry); PTE_SYNC(l3); va += PAGE_SIZE; pa += PAGE_SIZE; size -= PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /* * Remove a page from the kernel pagetables. * Note: not SMP coherent. */ PMAP_INLINE void pmap_kremove(vm_offset_t va) { pt_entry_t *l3; l3 = pmap_l3(kernel_pmap, va); KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); if (pmap_l3_valid_cacheable(pmap_load(l3))) cpu_dcache_wb_range(va, L3_SIZE); pmap_load_clear(l3); PTE_SYNC(l3); pmap_invalidate_page(kernel_pmap, va); } void pmap_kremove_device(vm_offset_t sva, vm_size_t size) { pt_entry_t *l3; vm_offset_t va; KASSERT((sva & L3_OFFSET) == 0, ("pmap_kremove_device: Invalid virtual address")); KASSERT((size & PAGE_MASK) == 0, ("pmap_kremove_device: Mapping is not page-sized")); va = sva; while (size != 0) { l3 = pmap_l3(kernel_pmap, va); KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); pmap_load_clear(l3); PTE_SYNC(l3); va += PAGE_SIZE; size -= PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /* * Used to map a range of physical addresses into kernel * virtual address space. * * The value passed in '*virt' is a suggested virtual address for * the mapping. Architectures which can support a direct-mapped * physical to virtual region can return the appropriate address * within that region, leaving '*virt' unchanged. Other * architectures should map the pages starting at '*virt' and * update '*virt' with the first usable address after the mapped * region. */ vm_offset_t pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) { return PHYS_TO_DMAP(start); } /* * Add a list of wired pages to the kva * this routine is only used for temporary * kernel mappings that do not need to have * page modification or references recorded. * Note that old mappings are simply written * over. The page *must* be wired. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) { pt_entry_t *l3, pa; vm_offset_t va; vm_page_t m; pt_entry_t entry; pn_t pn; int i; va = sva; for (i = 0; i < count; i++) { m = ma[i]; pa = VM_PAGE_TO_PHYS(m); pn = (pa / PAGE_SIZE); l3 = pmap_l3(kernel_pmap, va); - entry = (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S)); + entry = (PTE_V | PTE_RWX); entry |= (pn << PTE_PPN0_S); pmap_load_store(l3, entry); PTE_SYNC(l3); va += L3_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /* * This routine tears out page mappings from the * kernel -- it is meant only for temporary mappings. * Note: SMP coherent. Uses a ranged shootdown IPI. */ void pmap_qremove(vm_offset_t sva, int count) { pt_entry_t *l3; vm_offset_t va; KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); va = sva; while (count-- > 0) { l3 = pmap_l3(kernel_pmap, va); KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); if (pmap_l3_valid_cacheable(pmap_load(l3))) cpu_dcache_wb_range(va, L3_SIZE); pmap_load_clear(l3); PTE_SYNC(l3); va += PAGE_SIZE; } pmap_invalidate_range(kernel_pmap, sva, va); } /*************************************************** * Page table page management routines..... ***************************************************/ static __inline void pmap_free_zero_pages(struct spglist *free) { vm_page_t m; while ((m = SLIST_FIRST(free)) != NULL) { SLIST_REMOVE_HEAD(free, plinks.s.ss); /* Preserve the page's PG_ZERO setting. */ vm_page_free_toq(m); } } /* * Schedule the specified unused page table page to be freed. Specifically, * add the page to the specified list of pages that will be released to the * physical memory manager after the TLB has been updated. */ static __inline void pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, boolean_t set_PG_ZERO) { if (set_PG_ZERO) m->flags |= PG_ZERO; else m->flags &= ~PG_ZERO; SLIST_INSERT_HEAD(free, m, plinks.s.ss); } /* * Decrements a page table page's wire count, which is used to record the * number of valid page table entries within the page. If the wire count * drops to zero, then the page table page is unmapped. Returns TRUE if the * page table page was unmapped and FALSE otherwise. */ static inline boolean_t pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { --m->wire_count; if (m->wire_count == 0) { _pmap_unwire_l3(pmap, va, m, free); return (TRUE); } else { return (FALSE); } } static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) { vm_paddr_t phys; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * unmap the page table page */ if (m->pindex >= NUPDE) { /* PD page */ pd_entry_t *l1; l1 = pmap_l1(pmap, va); pmap_load_clear(l1); pmap_distribute_l1(pmap, pmap_l1_index(va), 0); PTE_SYNC(l1); } else { /* PTE page */ pd_entry_t *l2; l2 = pmap_l2(pmap, va); pmap_load_clear(l2); PTE_SYNC(l2); } pmap_resident_count_dec(pmap, 1); if (m->pindex < NUPDE) { pd_entry_t *l1; /* We just released a PT, unhold the matching PD */ vm_page_t pdpg; l1 = pmap_l1(pmap, va); phys = PTE_TO_PHYS(pmap_load(l1)); pdpg = PHYS_TO_VM_PAGE(phys); pmap_unwire_l3(pmap, va, pdpg, free); } pmap_invalidate_page(pmap, va); /* * This is a release store so that the ordinary store unmapping * the page table page is globally performed before TLB shoot- * down is begun. */ atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); /* * Put page on a list so that it is released after * *ALL* TLB shootdown is done */ pmap_add_delayed_free_list(m, free, TRUE); } /* * After removing an l3 entry, this routine is used to * conditionally free the page, and manage the hold/wire counts. */ static int pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, struct spglist *free) { vm_paddr_t phys; vm_page_t mpte; if (va >= VM_MAXUSER_ADDRESS) return (0); KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); phys = PTE_TO_PHYS(ptepde); mpte = PHYS_TO_VM_PAGE(phys); return (pmap_unwire_l3(pmap, va, mpte, free)); } void pmap_pinit0(pmap_t pmap) { PMAP_LOCK_INIT(pmap); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); pmap->pm_l1 = kernel_pmap->pm_l1; } int pmap_pinit(pmap_t pmap) { vm_paddr_t l1phys; vm_page_t l1pt; /* * allocate the l1 page */ while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) VM_WAIT; l1phys = VM_PAGE_TO_PHYS(l1pt); pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys); if ((l1pt->flags & PG_ZERO) == 0) pagezero(pmap->pm_l1); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); /* Install kernel pagetables */ memcpy(pmap->pm_l1, kernel_pmap->pm_l1, PAGE_SIZE); /* Add to the list of all user pmaps */ LIST_INSERT_HEAD(&allpmaps, pmap, pm_list); return (1); } /* * This routine is called if the desired page table page does not exist. * * If page table page allocation fails, this routine may sleep before * returning NULL. It sleeps only if a lock pointer was given. * * Note: If a page allocation fails at page table level two or three, * one or two pages may be held during the wait, only to be released * afterwards. This conservative approach is easily argued to avoid * race conditions. */ static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) { vm_page_t m, /*pdppg, */pdpg; pt_entry_t entry; vm_paddr_t phys; pn_t pn; PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* * Allocate a page table page. */ if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { if (lockp != NULL) { RELEASE_PV_LIST_LOCK(lockp); PMAP_UNLOCK(pmap); rw_runlock(&pvh_global_lock); VM_WAIT; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); } /* * Indicate the need to retry. While waiting, the page table * page may have been allocated. */ return (NULL); } if ((m->flags & PG_ZERO) == 0) pmap_zero_page(m); /* * Map the pagetable page into the process address space, if * it isn't already there. */ if (ptepindex >= NUPDE) { pd_entry_t *l1; vm_pindex_t l1index; l1index = ptepindex - NUPDE; l1 = &pmap->pm_l1[l1index]; pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE); - entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)); + entry = (PTE_V); entry |= (pn << PTE_PPN0_S); pmap_load_store(l1, entry); pmap_distribute_l1(pmap, l1index, entry); PTE_SYNC(l1); } else { vm_pindex_t l1index; pd_entry_t *l1, *l2; l1index = ptepindex >> (L1_SHIFT - L2_SHIFT); l1 = &pmap->pm_l1[l1index]; if (pmap_load(l1) == 0) { /* recurse for allocating page dir */ if (_pmap_alloc_l3(pmap, NUPDE + l1index, lockp) == NULL) { --m->wire_count; atomic_subtract_int(&vm_cnt.v_wire_count, 1); vm_page_free_zero(m); return (NULL); } } else { phys = PTE_TO_PHYS(pmap_load(l1)); pdpg = PHYS_TO_VM_PAGE(phys); pdpg->wire_count++; } phys = PTE_TO_PHYS(pmap_load(l1)); l2 = (pd_entry_t *)PHYS_TO_DMAP(phys); l2 = &l2[ptepindex & Ln_ADDR_MASK]; pn = (VM_PAGE_TO_PHYS(m) / PAGE_SIZE); - entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)); + entry = (PTE_V); entry |= (pn << PTE_PPN0_S); pmap_load_store(l2, entry); PTE_SYNC(l2); } pmap_resident_count_inc(pmap, 1); return (m); } static vm_page_t pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) { vm_pindex_t ptepindex; pd_entry_t *l2; vm_paddr_t phys; vm_page_t m; /* * Calculate pagetable page index */ ptepindex = pmap_l2_pindex(va); retry: /* * Get the page directory entry */ l2 = pmap_l2(pmap, va); /* * If the page table page is mapped, we just increment the * hold count, and activate it. */ if (l2 != NULL && pmap_load(l2) != 0) { phys = PTE_TO_PHYS(pmap_load(l2)); m = PHYS_TO_VM_PAGE(phys); m->wire_count++; } else { /* * Here if the pte page isn't mapped, or if it has been * deallocated. */ m = _pmap_alloc_l3(pmap, ptepindex, lockp); if (m == NULL && lockp != NULL) goto retry; } return (m); } /*************************************************** * Pmap allocation/deallocation routines. ***************************************************/ /* * Release any resources held by the given physical map. * Called when a pmap initialized by pmap_pinit is being released. * Should only be called if the map contains no valid mappings. */ void pmap_release(pmap_t pmap) { vm_page_t m; KASSERT(pmap->pm_stats.resident_count == 0, ("pmap_release: pmap resident count %ld != 0", pmap->pm_stats.resident_count)); m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1)); m->wire_count--; atomic_subtract_int(&vm_cnt.v_wire_count, 1); vm_page_free_zero(m); /* Remove pmap from the allpmaps list */ LIST_REMOVE(pmap, pm_list); /* Remove kernel pagetables */ bzero(pmap->pm_l1, PAGE_SIZE); } #if 0 static int kvm_size(SYSCTL_HANDLER_ARGS) { unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; return sysctl_handle_long(oidp, &ksize, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_size, "LU", "Size of KVM"); static int kvm_free(SYSCTL_HANDLER_ARGS) { unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; return sysctl_handle_long(oidp, &kfree, 0, req); } SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "LU", "Amount of KVM free"); #endif /* 0 */ /* * grow the number of kernel page table entries, if needed */ void pmap_growkernel(vm_offset_t addr) { vm_paddr_t paddr; vm_page_t nkpg; pd_entry_t *l1, *l2; pt_entry_t entry; pn_t pn; mtx_assert(&kernel_map->system_mtx, MA_OWNED); addr = roundup2(addr, L2_SIZE); if (addr - 1 >= kernel_map->max_offset) addr = kernel_map->max_offset; while (kernel_vm_end < addr) { l1 = pmap_l1(kernel_pmap, kernel_vm_end); if (pmap_load(l1) == 0) { /* We need a new PDP entry */ nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); if ((nkpg->flags & PG_ZERO) == 0) pmap_zero_page(nkpg); paddr = VM_PAGE_TO_PHYS(nkpg); pn = (paddr / PAGE_SIZE); - entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)); + entry = (PTE_V); entry |= (pn << PTE_PPN0_S); pmap_load_store(l1, entry); pmap_distribute_l1(kernel_pmap, pmap_l1_index(kernel_vm_end), entry); PTE_SYNC(l1); continue; /* try again */ } l2 = pmap_l1_to_l2(l1, kernel_vm_end); - if ((pmap_load(l2) & PTE_REF) != 0) { + if ((pmap_load(l2) & PTE_A) != 0) { kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } continue; } nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT, VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); - if ((nkpg->flags & PG_ZERO) == 0) + if ((nkpg->flags & PG_ZERO) == 0) { pmap_zero_page(nkpg); + } paddr = VM_PAGE_TO_PHYS(nkpg); pn = (paddr / PAGE_SIZE); - entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)); + entry = (PTE_V); entry |= (pn << PTE_PPN0_S); pmap_load_store(l2, entry); PTE_SYNC(l2); pmap_invalidate_page(kernel_pmap, kernel_vm_end); kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; if (kernel_vm_end - 1 >= kernel_map->max_offset) { kernel_vm_end = kernel_map->max_offset; break; } } } /*************************************************** * page management routines. ***************************************************/ CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); CTASSERT(_NPCM == 3); CTASSERT(_NPCPV == 168); static __inline struct pv_chunk * pv_to_chunk(pv_entry_t pv) { return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); } #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) #define PC_FREE0 0xfffffffffffffffful #define PC_FREE1 0xfffffffffffffffful #define PC_FREE2 0x000000fffffffffful static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; #if 0 #ifdef PV_STATS static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, "Current number of pv entry chunks"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, "Current number of pv entry chunks allocated"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, "Current number of pv entry chunks frees"); SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, "Number of times tried to get a chunk page but failed."); static long pv_entry_frees, pv_entry_allocs, pv_entry_count; static int pv_entry_spare; SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, "Current number of pv entry frees"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, "Current number of pv entry allocs"); SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, "Current number of pv entries"); SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, "Current number of spare pv entries"); #endif #endif /* 0 */ /* * We are in a serious low memory condition. Resort to * drastic measures to free some pages so we can allocate * another pv entry chunk. * * Returns NULL if PV entries were reclaimed from the specified pmap. * * We do not, however, unmap 2mpages because subsequent accesses will * allocate per-page pv entries until repromotion occurs, thereby * exacerbating the shortage of free pv entries. */ static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) { panic("RISCVTODO: reclaim_pv_chunk"); } /* * free the pv_entry back to the free list */ static void free_pv_entry(pmap_t pmap, pv_entry_t pv) { struct pv_chunk *pc; int idx, field, bit; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(atomic_add_long(&pv_entry_frees, 1)); PV_STAT(atomic_add_int(&pv_entry_spare, 1)); PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); pc = pv_to_chunk(pv); idx = pv - &pc->pc_pventry[0]; field = idx / 64; bit = idx % 64; pc->pc_map[field] |= 1ul << bit; if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || pc->pc_map[2] != PC_FREE2) { /* 98% of the time, pc is already at the head of the list. */ if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); } return; } TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } static void free_pv_chunk(struct pv_chunk *pc) { vm_page_t m; mtx_lock(&pv_chunks_mutex); TAILQ_REMOVE(&pv_chunks, pc, pc_lru); mtx_unlock(&pv_chunks_mutex); PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); /* entire chunk is free, return it */ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); #if 0 /* TODO: For minidump */ dump_drop_page(m->phys_addr); #endif vm_page_unwire(m, PQ_INACTIVE); vm_page_free(m); } /* * Returns a new PV entry, allocating a new PV chunk from the system when * needed. If this PV chunk allocation fails and a PV list lock pointer was * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is * returned. * * The given PV list lock may be released. */ static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp) { int bit, field; pv_entry_t pv; struct pv_chunk *pc; vm_page_t m; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); retry: pc = TAILQ_FIRST(&pmap->pm_pvchunk); if (pc != NULL) { for (field = 0; field < _NPCM; field++) { if (pc->pc_map[field]) { bit = ffsl(pc->pc_map[field]) - 1; break; } } if (field < _NPCM) { pv = &pc->pc_pventry[field * 64 + bit]; pc->pc_map[field] &= ~(1ul << bit); /* If this was the last item, move it to tail */ if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); } PV_STAT(atomic_add_long(&pv_entry_count, 1)); PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); return (pv); } } /* No free items, allocate another chunk */ m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); if (m == NULL) { if (lockp == NULL) { PV_STAT(pc_chunk_tryfail++); return (NULL); } m = reclaim_pv_chunk(pmap, lockp); if (m == NULL) goto retry; } PV_STAT(atomic_add_int(&pc_chunk_count, 1)); PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); #if 0 /* TODO: This is for minidump */ dump_add_page(m->phys_addr); #endif pc = (void *)PHYS_TO_DMAP(m->phys_addr); pc->pc_pmap = pmap; pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ pc->pc_map[1] = PC_FREE1; pc->pc_map[2] = PC_FREE2; mtx_lock(&pv_chunks_mutex); TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); mtx_unlock(&pv_chunks_mutex); pv = &pc->pc_pventry[0]; TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); PV_STAT(atomic_add_long(&pv_entry_count, 1)); PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); return (pv); } /* * First find and then remove the pv entry for the specified pmap and virtual * address from the specified pv list. Returns the pv entry if found and NULL * otherwise. This operation can be performed on pv lists for either 4KB or * 2MB page mappings. */ static __inline pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_LOCKED); TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { if (pmap == PV_PMAP(pv) && va == pv->pv_va) { TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); pvh->pv_gen++; break; } } return (pv); } /* * First find and then destroy the pv entry for the specified pmap and virtual * address. This operation can be performed on pv lists for either 4KB or 2MB * page mappings. */ static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) { pv_entry_t pv; pv = pmap_pvh_remove(pvh, pmap, va); KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); free_pv_entry(pmap, pv); } /* * Conditionally create the PV entry for a 4KB page mapping if the required * memory can be allocated without resorting to reclamation. */ static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, struct rwlock **lockp) { pv_entry_t pv; rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); /* Pass NULL instead of the lock pointer to disable reclamation. */ if ((pv = get_pv_entry(pmap, NULL)) != NULL) { pv->pv_va = va; CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; return (TRUE); } else return (FALSE); } /* * pmap_remove_l3: do the things to unmap a page in a process */ static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) { pt_entry_t old_l3; vm_paddr_t phys; vm_page_t m; PMAP_LOCK_ASSERT(pmap, MA_OWNED); if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3))) cpu_dcache_wb_range(va, L3_SIZE); old_l3 = pmap_load_clear(l3); PTE_SYNC(l3); pmap_invalidate_page(pmap, va); if (old_l3 & PTE_SW_WIRED) pmap->pm_stats.wired_count -= 1; pmap_resident_count_dec(pmap, 1); if (old_l3 & PTE_SW_MANAGED) { phys = PTE_TO_PHYS(old_l3); m = PHYS_TO_VM_PAGE(phys); if (pmap_page_dirty(old_l3)) vm_page_dirty(m); - if (old_l3 & PTE_REF) + if (old_l3 & PTE_A) vm_page_aflag_set(m, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); pmap_pvh_free(&m->md, pmap, va); } return (pmap_unuse_l3(pmap, va, l2e, free)); } /* * Remove the given range of addresses from the specified map. * * It is assumed that the start and end are properly * rounded to the page size. */ void pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { struct rwlock *lock; vm_offset_t va, va_next; pd_entry_t *l1, *l2; pt_entry_t l3_pte, *l3; struct spglist free; int anyvalid; /* * Perform an unsynchronized read. This is, however, safe. */ if (pmap->pm_stats.resident_count == 0) return; anyvalid = 0; SLIST_INIT(&free); rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); lock = NULL; for (; sva < eva; sva = va_next) { if (pmap->pm_stats.resident_count == 0) break; l1 = pmap_l1(pmap, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) va_next = eva; continue; } /* * Calculate index for next page table. */ va_next = (sva + L2_SIZE) & ~L2_OFFSET; if (va_next < sva) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); if (l2 == NULL) continue; l3_pte = pmap_load(l2); /* * Weed out invalid mappings. */ if (l3_pte == 0) continue; - if ((pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S)) + if ((pmap_load(l2) & PTE_RX) != 0) continue; /* * Limit our scan to either the end of the va represented * by the current page table page, or to the end of the * range being removed. */ if (va_next > eva) va_next = eva; va = va_next; for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, sva += L3_SIZE) { if (l3 == NULL) panic("l3 == NULL"); if (pmap_load(l3) == 0) { if (va != va_next) { pmap_invalidate_range(pmap, va, sva); va = va_next; } continue; } if (va == va_next) va = sva; if (pmap_remove_l3(pmap, l3, sva, l3_pte, &free, &lock)) { sva += L3_SIZE; break; } } if (va != va_next) pmap_invalidate_range(pmap, va, sva); } if (lock != NULL) rw_wunlock(lock); if (anyvalid) pmap_invalidate_all(pmap); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); pmap_free_zero_pages(&free); } /* * Routine: pmap_remove_all * Function: * Removes this physical page from * all physical maps in which it resides. * Reflects back modify bits to the pager. * * Notes: * Original versions of this routine were very * inefficient because they iteratively called * pmap_remove (slow...) */ void pmap_remove_all(vm_page_t m) { pv_entry_t pv; pmap_t pmap; pt_entry_t *l3, tl3; pd_entry_t *l2, tl2; struct spglist free; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_all: page %p is not managed", m)); SLIST_INIT(&free); rw_wlock(&pvh_global_lock); while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { pmap = PV_PMAP(pv); PMAP_LOCK(pmap); pmap_resident_count_dec(pmap, 1); l2 = pmap_l2(pmap, pv->pv_va); KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found")); tl2 = pmap_load(l2); - KASSERT((tl2 & PTE_TYPE_M) == (PTE_TYPE_PTR << PTE_TYPE_S), + KASSERT((tl2 & PTE_RX) == 0, ("pmap_remove_all: found a table when expecting " "a block in %p's pv list", m)); l3 = pmap_l2_to_l3(l2, pv->pv_va); if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3))) cpu_dcache_wb_range(pv->pv_va, L3_SIZE); tl3 = pmap_load_clear(l3); PTE_SYNC(l3); pmap_invalidate_page(pmap, pv->pv_va); if (tl3 & PTE_SW_WIRED) pmap->pm_stats.wired_count--; - if ((tl3 & PTE_REF) != 0) + if ((tl3 & PTE_A) != 0) vm_page_aflag_set(m, PGA_REFERENCED); /* * Update the vm_page_t clean and reference bits. */ if (pmap_page_dirty(tl3)) vm_page_dirty(m); pmap_unuse_l3(pmap, pv->pv_va, pmap_load(l2), &free); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; free_pv_entry(pmap, pv); PMAP_UNLOCK(pmap); } vm_page_aflag_clear(m, PGA_WRITEABLE); rw_wunlock(&pvh_global_lock); pmap_free_zero_pages(&free); } /* * Set the physical protection on the * specified range of this map as requested. */ void pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { vm_offset_t va, va_next; pd_entry_t *l1, *l2; pt_entry_t *l3p, l3; pt_entry_t entry; if ((prot & VM_PROT_READ) == VM_PROT_NONE) { pmap_remove(pmap, sva, eva); return; } if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE) return; PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { l1 = pmap_l1(pmap, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) va_next = eva; continue; } va_next = (sva + L2_SIZE) & ~L2_OFFSET; if (va_next < sva) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); if (l2 == NULL) continue; - if ((pmap_load(l2) & PTE_TYPE_M) != (PTE_TYPE_PTR << PTE_TYPE_S)) + if ((pmap_load(l2) & PTE_RX) != 0) continue; if (va_next > eva) va_next = eva; va = va_next; for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, sva += L3_SIZE) { l3 = pmap_load(l3p); if (pmap_l3_valid(l3)) { entry = pmap_load(l3p); - entry &= ~(1 << PTE_TYPE_S); + entry &= ~(PTE_W); pmap_load_store(l3p, entry); PTE_SYNC(l3p); /* XXX: Use pmap_invalidate_range */ pmap_invalidate_page(pmap, va); } } } PMAP_UNLOCK(pmap); /* TODO: Only invalidate entries we are touching */ pmap_invalidate_all(pmap); } /* * Insert the given physical page (p) at * the specified virtual address (v) in the * target physical map with the protection requested. * * If specified, the page will be wired down, meaning * that the related pte can not be reclaimed. * * NB: This is the only routine which MAY NOT lazy-evaluate * or lose information. That is, this routine must actually * insert this page into the given map NOW. */ int pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, u_int flags, int8_t psind __unused) { struct rwlock *lock; pd_entry_t *l1, *l2; pt_entry_t new_l3, orig_l3; pt_entry_t *l3; pv_entry_t pv; vm_paddr_t opa, pa, l2_pa, l3_pa; vm_page_t mpte, om, l2_m, l3_m; boolean_t nosleep; pt_entry_t entry; pn_t l2_pn; pn_t l3_pn; pn_t pn; va = trunc_page(va); if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) VM_OBJECT_ASSERT_LOCKED(m->object); pa = VM_PAGE_TO_PHYS(m); pn = (pa / PAGE_SIZE); - new_l3 = PTE_VALID; + new_l3 = PTE_V | PTE_R | PTE_X; + if (prot & VM_PROT_WRITE) + new_l3 |= PTE_W; + if ((va >> 63) == 0) + new_l3 |= PTE_U; - if ((prot & VM_PROT_WRITE) == 0) { /* Read-only */ - if ((va >> 63) == 0) /* USER */ - new_l3 |= (PTE_TYPE_SURX << PTE_TYPE_S); - else /* KERNEL */ - new_l3 |= (PTE_TYPE_SRX << PTE_TYPE_S); - } else { - if ((va >> 63) == 0) /* USER */ - new_l3 |= (PTE_TYPE_SURWX << PTE_TYPE_S); - else /* KERNEL */ - new_l3 |= (PTE_TYPE_SRWX << PTE_TYPE_S); - } - new_l3 |= (pn << PTE_PPN0_S); if ((flags & PMAP_ENTER_WIRED) != 0) new_l3 |= PTE_SW_WIRED; CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); mpte = NULL; lock = NULL; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); if (va < VM_MAXUSER_ADDRESS) { nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); if (mpte == NULL && nosleep) { CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (KERN_RESOURCE_SHORTAGE); } l3 = pmap_l3(pmap, va); } else { l3 = pmap_l3(pmap, va); /* TODO: This is not optimal, but should mostly work */ if (l3 == NULL) { l2 = pmap_l2(pmap, va); if (l2 == NULL) { l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (l2_m == NULL) panic("pmap_enter: l2 pte_m == NULL"); if ((l2_m->flags & PG_ZERO) == 0) pmap_zero_page(l2_m); l2_pa = VM_PAGE_TO_PHYS(l2_m); l2_pn = (l2_pa / PAGE_SIZE); l1 = pmap_l1(pmap, va); - entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)); + entry = (PTE_V); entry |= (l2_pn << PTE_PPN0_S); pmap_load_store(l1, entry); pmap_distribute_l1(pmap, pmap_l1_index(va), entry); PTE_SYNC(l1); l2 = pmap_l1_to_l2(l1, va); } KASSERT(l2 != NULL, ("No l2 table after allocating one")); l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); if (l3_m == NULL) panic("pmap_enter: l3 pte_m == NULL"); if ((l3_m->flags & PG_ZERO) == 0) pmap_zero_page(l3_m); l3_pa = VM_PAGE_TO_PHYS(l3_m); l3_pn = (l3_pa / PAGE_SIZE); - entry = (PTE_VALID | (PTE_TYPE_PTR << PTE_TYPE_S)); + entry = (PTE_V); entry |= (l3_pn << PTE_PPN0_S); pmap_load_store(l2, entry); PTE_SYNC(l2); l3 = pmap_l2_to_l3(l2, va); } pmap_invalidate_page(pmap, va); } om = NULL; orig_l3 = pmap_load(l3); opa = PTE_TO_PHYS(orig_l3); /* * Is the specified virtual address already mapped? */ if (pmap_l3_valid(orig_l3)) { /* * Wiring change, just update stats. We don't worry about * wiring PT pages as they remain resident as long as there * are valid mappings in them. Hence, if a user page is wired, * the PT page will be also. */ if ((flags & PMAP_ENTER_WIRED) != 0 && (orig_l3 & PTE_SW_WIRED) == 0) pmap->pm_stats.wired_count++; else if ((flags & PMAP_ENTER_WIRED) == 0 && (orig_l3 & PTE_SW_WIRED) != 0) pmap->pm_stats.wired_count--; /* * Remove the extra PT page reference. */ if (mpte != NULL) { mpte->wire_count--; KASSERT(mpte->wire_count > 0, ("pmap_enter: missing reference to page table page," " va: 0x%lx", va)); } /* * Has the physical page changed? */ if (opa == pa) { /* * No, might be a protection or wiring change. */ if ((orig_l3 & PTE_SW_MANAGED) != 0) { new_l3 |= PTE_SW_MANAGED; if (pmap_is_write(new_l3)) vm_page_aflag_set(m, PGA_WRITEABLE); } goto validate; } /* Flush the cache, there might be uncommitted data in it */ if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3)) cpu_dcache_wb_range(va, L3_SIZE); } else { /* * Increment the counters. */ if ((new_l3 & PTE_SW_WIRED) != 0) pmap->pm_stats.wired_count++; pmap_resident_count_inc(pmap, 1); } /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0) { new_l3 |= PTE_SW_MANAGED; pv = get_pv_entry(pmap, &lock); pv->pv_va = va; CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; if (pmap_is_write(new_l3)) vm_page_aflag_set(m, PGA_WRITEABLE); } /* * Update the L3 entry. */ if (orig_l3 != 0) { validate: orig_l3 = pmap_load_store(l3, new_l3); PTE_SYNC(l3); opa = PTE_TO_PHYS(orig_l3); if (opa != pa) { if ((orig_l3 & PTE_SW_MANAGED) != 0) { om = PHYS_TO_VM_PAGE(opa); if (pmap_page_dirty(orig_l3)) vm_page_dirty(om); - if ((orig_l3 & PTE_REF) != 0) + if ((orig_l3 & PTE_A) != 0) vm_page_aflag_set(om, PGA_REFERENCED); CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); pmap_pvh_free(&om->md, pmap, va); } } else if (pmap_page_dirty(orig_l3)) { if ((orig_l3 & PTE_SW_MANAGED) != 0) vm_page_dirty(m); } } else { pmap_load_store(l3, new_l3); PTE_SYNC(l3); } pmap_invalidate_page(pmap, va); if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) cpu_icache_sync_range(va, PAGE_SIZE); if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); return (KERN_SUCCESS); } /* * Maps a sequence of resident pages belonging to the same object. * The sequence begins with the given page m_start. This page is * mapped at the given virtual address start. Each subsequent page is * mapped at a virtual address that is offset from start by the same * amount as the page is offset from m_start within the object. The * last page in the sequence is the page with the largest offset from * m_start that can be mapped at a virtual address less than the given * virtual address end. Not every virtual page between start and end * is mapped; only those for which a resident page exists with the * corresponding offset from m_start are mapped. */ void pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, vm_page_t m_start, vm_prot_t prot) { struct rwlock *lock; vm_offset_t va; vm_page_t m, mpte; vm_pindex_t diff, psize; VM_OBJECT_ASSERT_LOCKED(m_start->object); psize = atop(end - start); mpte = NULL; m = m_start; lock = NULL; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { va = start + ptoa(diff); mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); m = TAILQ_NEXT(m, listq); } if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * this code makes some *MAJOR* assumptions: * 1. Current pmap & pmap exists. * 2. Not wired. * 3. Read access. * 4. No page table pages. * but is *MUCH* faster than pmap_enter... */ void pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) { struct rwlock *lock; lock = NULL; rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) { struct spglist free; vm_paddr_t phys; pd_entry_t *l2; pt_entry_t *l3; vm_paddr_t pa; pt_entry_t entry; pn_t pn; KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || (m->oflags & VPO_UNMANAGED) != 0, ("pmap_enter_quick_locked: managed mapping within the clean submap")); rw_assert(&pvh_global_lock, RA_LOCKED); PMAP_LOCK_ASSERT(pmap, MA_OWNED); CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va); /* * In the case that a page table page is not * resident, we are creating it here. */ if (va < VM_MAXUSER_ADDRESS) { vm_pindex_t l2pindex; /* * Calculate pagetable page index */ l2pindex = pmap_l2_pindex(va); if (mpte && (mpte->pindex == l2pindex)) { mpte->wire_count++; } else { /* * Get the l2 entry */ l2 = pmap_l2(pmap, va); /* * If the page table page is mapped, we just increment * the hold count, and activate it. Otherwise, we * attempt to allocate a page table page. If this * attempt fails, we don't retry. Instead, we give up. */ if (l2 != NULL && pmap_load(l2) != 0) { phys = PTE_TO_PHYS(pmap_load(l2)); mpte = PHYS_TO_VM_PAGE(phys); mpte->wire_count++; } else { /* * Pass NULL instead of the PV list lock * pointer, because we don't intend to sleep. */ mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); if (mpte == NULL) return (mpte); } } l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); l3 = &l3[pmap_l3_index(va)]; } else { mpte = NULL; l3 = pmap_l3(kernel_pmap, va); } if (l3 == NULL) panic("pmap_enter_quick_locked: No l3"); if (pmap_load(l3) != 0) { if (mpte != NULL) { mpte->wire_count--; mpte = NULL; } return (mpte); } /* * Enter on the PV list if part of our managed memory. */ if ((m->oflags & VPO_UNMANAGED) == 0 && !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { if (mpte != NULL) { SLIST_INIT(&free); if (pmap_unwire_l3(pmap, va, mpte, &free)) { pmap_invalidate_page(pmap, va); pmap_free_zero_pages(&free); } mpte = NULL; } return (mpte); } /* * Increment counters */ pmap_resident_count_inc(pmap, 1); pa = VM_PAGE_TO_PHYS(m); pn = (pa / PAGE_SIZE); /* RISCVTODO: check permissions */ - entry = (PTE_VALID | (PTE_TYPE_SRWX << PTE_TYPE_S)); + entry = (PTE_V | PTE_RWX); entry |= (pn << PTE_PPN0_S); /* * Now validate mapping with RO protection */ if ((m->oflags & VPO_UNMANAGED) == 0) entry |= PTE_SW_MANAGED; pmap_load_store(l3, entry); PTE_SYNC(l3); pmap_invalidate_page(pmap, va); return (mpte); } /* * This code maps large physical mmap regions into the * processor address space. Note that some shortcuts * are taken, but the code works. */ void pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, vm_pindex_t pindex, vm_size_t size) { VM_OBJECT_ASSERT_WLOCKED(object); KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, ("pmap_object_init_pt: non-device object")); } /* * Clear the wired attribute from the mappings for the specified range of * addresses in the given pmap. Every valid mapping within that range * must have the wired attribute set. In contrast, invalid mappings * cannot have the wired attribute set, so they are ignored. * * The wired attribute of the page table entry is not a hardware feature, * so there is no need to invalidate any TLB entries. */ void pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t va_next; pd_entry_t *l1, *l2; pt_entry_t *l3; boolean_t pv_lists_locked; pv_lists_locked = FALSE; PMAP_LOCK(pmap); for (; sva < eva; sva = va_next) { l1 = pmap_l1(pmap, sva); if (pmap_load(l1) == 0) { va_next = (sva + L1_SIZE) & ~L1_OFFSET; if (va_next < sva) va_next = eva; continue; } va_next = (sva + L2_SIZE) & ~L2_OFFSET; if (va_next < sva) va_next = eva; l2 = pmap_l1_to_l2(l1, sva); if (pmap_load(l2) == 0) continue; if (va_next > eva) va_next = eva; for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, sva += L3_SIZE) { if (pmap_load(l3) == 0) continue; if ((pmap_load(l3) & PTE_SW_WIRED) == 0) panic("pmap_unwire: l3 %#jx is missing " "PTE_SW_WIRED", (uintmax_t)*l3); /* * PG_W must be cleared atomically. Although the pmap * lock synchronizes access to PG_W, another processor * could be setting PG_M and/or PG_A concurrently. */ atomic_clear_long(l3, PTE_SW_WIRED); pmap->pm_stats.wired_count--; } } if (pv_lists_locked) rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); } /* * Copy the range specified by src_addr/len * from the source map to the range dst_addr/len * in the destination map. * * This routine is only advisory and need not do anything. */ void pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, vm_offset_t src_addr) { } /* * pmap_zero_page zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. */ void pmap_zero_page(vm_page_t m) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); pagezero((void *)va); } /* * pmap_zero_page_area zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. * * off and size may not cover an area beyond a single hardware page. */ void pmap_zero_page_area(vm_page_t m, int off, int size) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); if (off == 0 && size == PAGE_SIZE) pagezero((void *)va); else bzero((char *)va + off, size); } /* * pmap_zero_page_idle zeros the specified hardware page by mapping * the page into KVM and using bzero to clear its contents. This * is intended to be called from the vm_pagezero process only and * outside of Giant. */ void pmap_zero_page_idle(vm_page_t m) { vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); pagezero((void *)va); } /* * pmap_copy_page copies the specified (machine independent) * page by mapping the page into virtual memory and using * bcopy to copy the page, one machine dependent page at a * time. */ void pmap_copy_page(vm_page_t msrc, vm_page_t mdst) { vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); pagecopy((void *)src, (void *)dst); } int unmapped_buf_allowed = 1; void pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], vm_offset_t b_offset, int xfersize) { void *a_cp, *b_cp; vm_page_t m_a, m_b; vm_paddr_t p_a, p_b; vm_offset_t a_pg_offset, b_pg_offset; int cnt; while (xfersize > 0) { a_pg_offset = a_offset & PAGE_MASK; m_a = ma[a_offset >> PAGE_SHIFT]; p_a = m_a->phys_addr; b_pg_offset = b_offset & PAGE_MASK; m_b = mb[b_offset >> PAGE_SHIFT]; p_b = m_b->phys_addr; cnt = min(xfersize, PAGE_SIZE - a_pg_offset); cnt = min(cnt, PAGE_SIZE - b_pg_offset); if (__predict_false(!PHYS_IN_DMAP(p_a))) { panic("!DMAP a %lx", p_a); } else { a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; } if (__predict_false(!PHYS_IN_DMAP(p_b))) { panic("!DMAP b %lx", p_b); } else { b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; } bcopy(a_cp, b_cp, cnt); a_offset += cnt; b_offset += cnt; xfersize -= cnt; } } vm_offset_t pmap_quick_enter_page(vm_page_t m) { return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); } void pmap_quick_remove_page(vm_offset_t addr) { } /* * Returns true if the pmap's pv is one of the first * 16 pvs linked to from this page. This count may * be changed upwards or downwards in the future; it * is only necessary that true be returned for a small * subset of pmaps for proper page aging. */ boolean_t pmap_page_exists_quick(pmap_t pmap, vm_page_t m) { struct rwlock *lock; pv_entry_t pv; int loops = 0; boolean_t rv; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_page_exists_quick: page %p is not managed", m)); rv = FALSE; rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { if (PV_PMAP(pv) == pmap) { rv = TRUE; break; } loops++; if (loops >= 16) break; } rw_runlock(lock); rw_runlock(&pvh_global_lock); return (rv); } /* * pmap_page_wired_mappings: * * Return the number of managed mappings to the given physical page * that are wired. */ int pmap_page_wired_mappings(vm_page_t m) { struct rwlock *lock; pmap_t pmap; pt_entry_t *l3; pv_entry_t pv; int count, md_gen; if ((m->oflags & VPO_UNMANAGED) != 0) return (0); rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); restart: count = 0; TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } l3 = pmap_l3(pmap, pv->pv_va); if (l3 != NULL && (pmap_load(l3) & PTE_SW_WIRED) != 0) count++; PMAP_UNLOCK(pmap); } rw_runlock(lock); rw_runlock(&pvh_global_lock); return (count); } /* * Destroy all managed, non-wired mappings in the given user-space * pmap. This pmap cannot be active on any processor besides the * caller. * * This function cannot be applied to the kernel pmap. Moreover, it * is not intended for general use. It is only to be used during * process termination. Consequently, it can be implemented in ways * that make it faster than pmap_remove(). First, it can more quickly * destroy mappings by iterating over the pmap's collection of PV * entries, rather than searching the page table. Second, it doesn't * have to test and clear the page table entries atomically, because * no processor is currently accessing the user address space. In * particular, a page table entry's dirty bit won't change state once * this function starts. */ void pmap_remove_pages(pmap_t pmap) { pd_entry_t ptepde, *l2; pt_entry_t *l3, tl3; struct spglist free; vm_page_t m; pv_entry_t pv; struct pv_chunk *pc, *npc; struct rwlock *lock; int64_t bit; uint64_t inuse, bitmask; int allfree, field, freed, idx; vm_paddr_t pa; lock = NULL; SLIST_INIT(&free); rw_rlock(&pvh_global_lock); PMAP_LOCK(pmap); TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { allfree = 1; freed = 0; for (field = 0; field < _NPCM; field++) { inuse = ~pc->pc_map[field] & pc_freemask[field]; while (inuse != 0) { bit = ffsl(inuse) - 1; bitmask = 1UL << bit; idx = field * 64 + bit; pv = &pc->pc_pventry[idx]; inuse &= ~bitmask; l2 = pmap_l2(pmap, pv->pv_va); ptepde = pmap_load(l2); l3 = pmap_l2_to_l3(l2, pv->pv_va); tl3 = pmap_load(l3); /* * We cannot remove wired pages from a process' mapping at this time */ if (tl3 & PTE_SW_WIRED) { allfree = 0; continue; } pa = PTE_TO_PHYS(tl3); m = PHYS_TO_VM_PAGE(pa); KASSERT(m->phys_addr == pa, ("vm_page_t %p phys_addr mismatch %016jx %016jx", m, (uintmax_t)m->phys_addr, (uintmax_t)tl3)); KASSERT((m->flags & PG_FICTITIOUS) != 0 || m < &vm_page_array[vm_page_array_size], ("pmap_remove_pages: bad l3 %#jx", (uintmax_t)tl3)); if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3))) cpu_dcache_wb_range(pv->pv_va, L3_SIZE); pmap_load_clear(l3); PTE_SYNC(l3); pmap_invalidate_page(pmap, pv->pv_va); /* * Update the vm_page_t clean/reference bits. */ if (pmap_page_dirty(tl3)) vm_page_dirty(m); CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); /* Mark free */ pc->pc_map[field] |= bitmask; pmap_resident_count_dec(pmap, 1); TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; pmap_unuse_l3(pmap, pv->pv_va, ptepde, &free); freed++; } } PV_STAT(atomic_add_long(&pv_entry_frees, freed)); PV_STAT(atomic_add_int(&pv_entry_spare, freed)); PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); if (allfree) { TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); free_pv_chunk(pc); } } pmap_invalidate_all(pmap); if (lock != NULL) rw_wunlock(lock); rw_runlock(&pvh_global_lock); PMAP_UNLOCK(pmap); pmap_free_zero_pages(&free); } /* * This is used to check if a page has been accessed or modified. As we * don't have a bit to see if it has been modified we have to assume it * has been if the page is read/write. */ static boolean_t pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) { struct rwlock *lock; pv_entry_t pv; pt_entry_t *l3, mask, value; pmap_t pmap; int md_gen; boolean_t rv; rv = FALSE; rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); rw_rlock(lock); restart: TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_runlock(lock); PMAP_LOCK(pmap); rw_rlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto restart; } } l3 = pmap_l3(pmap, pv->pv_va); mask = 0; value = 0; if (modified) { - mask |= PTE_DIRTY; - value |= PTE_DIRTY; + mask |= PTE_D; + value |= PTE_D; } if (accessed) { - mask |= PTE_REF; - value |= PTE_REF; + mask |= PTE_A; + value |= PTE_A; } #if 0 if (modified) { mask |= ATTR_AP_RW_BIT; value |= ATTR_AP(ATTR_AP_RW); } if (accessed) { mask |= ATTR_AF | ATTR_DESCR_MASK; value |= ATTR_AF | L3_PAGE; } #endif rv = (pmap_load(l3) & mask) == value; PMAP_UNLOCK(pmap); if (rv) goto out; } out: rw_runlock(lock); rw_runlock(&pvh_global_lock); return (rv); } /* * pmap_is_modified: * * Return whether or not the specified physical page was modified * in any physical maps. */ boolean_t pmap_is_modified(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_modified: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * concurrently set while the object is locked. Thus, if PGA_WRITEABLE * is clear, no PTEs can have PG_M set. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return (FALSE); return (pmap_page_test_mappings(m, FALSE, TRUE)); } /* * pmap_is_prefaultable: * * Return whether or not the specified virtual address is eligible * for prefault. */ boolean_t pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) { pt_entry_t *l3; boolean_t rv; rv = FALSE; PMAP_LOCK(pmap); l3 = pmap_l3(pmap, addr); if (l3 != NULL && pmap_load(l3) != 0) { rv = TRUE; } PMAP_UNLOCK(pmap); return (rv); } /* * pmap_is_referenced: * * Return whether or not the specified physical page was referenced * in any physical maps. */ boolean_t pmap_is_referenced(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_is_referenced: page %p is not managed", m)); return (pmap_page_test_mappings(m, TRUE, FALSE)); } /* * Clear the write and modified bits in each of the given page's mappings. */ void pmap_remove_write(vm_page_t m) { pmap_t pmap; struct rwlock *lock; pv_entry_t pv; pt_entry_t *l3, oldl3; pt_entry_t newl3; int md_gen; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_remove_write: page %p is not managed", m)); /* * If the page is not exclusive busied, then PGA_WRITEABLE cannot be * set by another thread while the object is locked. Thus, * if PGA_WRITEABLE is clear, no page table entries need updating. */ VM_OBJECT_ASSERT_WLOCKED(m->object); if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) return; rw_rlock(&pvh_global_lock); lock = VM_PAGE_TO_PV_LIST_LOCK(m); retry_pv_loop: rw_wlock(lock); TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); rw_wunlock(lock); goto retry_pv_loop; } } l3 = pmap_l3(pmap, pv->pv_va); retry: oldl3 = pmap_load(l3); if (pmap_is_write(oldl3)) { - newl3 = oldl3 & ~(1 << PTE_TYPE_S); + newl3 = oldl3 & ~(PTE_W); if (!atomic_cmpset_long(l3, oldl3, newl3)) goto retry; /* TODO: use pmap_page_dirty(oldl3) ? */ - if ((oldl3 & PTE_REF) != 0) + if ((oldl3 & PTE_A) != 0) vm_page_dirty(m); pmap_invalidate_page(pmap, pv->pv_va); } PMAP_UNLOCK(pmap); } rw_wunlock(lock); vm_page_aflag_clear(m, PGA_WRITEABLE); rw_runlock(&pvh_global_lock); } static __inline boolean_t safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) { return (FALSE); } #define PMAP_TS_REFERENCED_MAX 5 /* * pmap_ts_referenced: * * Return a count of reference bits for a page, clearing those bits. * It is not necessary for every reference bit to be cleared, but it * is necessary that 0 only be returned when there are truly no * reference bits set. * * XXX: The exact number of bits to check and clear is a matter that * should be tested and standardized at some point in the future for * optimal aging of shared pages. */ int pmap_ts_referenced(vm_page_t m) { pv_entry_t pv, pvf; pmap_t pmap; struct rwlock *lock; pd_entry_t *l2; pt_entry_t *l3; vm_paddr_t pa; int cleared, md_gen, not_cleared; struct spglist free; KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_ts_referenced: page %p is not managed", m)); SLIST_INIT(&free); cleared = 0; pa = VM_PAGE_TO_PHYS(m); lock = PHYS_TO_PV_LIST_LOCK(pa); rw_rlock(&pvh_global_lock); rw_wlock(lock); retry: not_cleared = 0; if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) goto out; pv = pvf; do { if (pvf == NULL) pvf = pv; pmap = PV_PMAP(pv); if (!PMAP_TRYLOCK(pmap)) { md_gen = m->md.pv_gen; rw_wunlock(lock); PMAP_LOCK(pmap); rw_wlock(lock); if (md_gen != m->md.pv_gen) { PMAP_UNLOCK(pmap); goto retry; } } l2 = pmap_l2(pmap, pv->pv_va); - KASSERT((pmap_load(l2) & PTE_TYPE_M) == (PTE_TYPE_PTR << PTE_TYPE_S), + KASSERT((pmap_load(l2) & PTE_RX) == 0, ("pmap_ts_referenced: found an invalid l2 table")); l3 = pmap_l2_to_l3(l2, pv->pv_va); - if ((pmap_load(l3) & PTE_REF) != 0) { + if ((pmap_load(l3) & PTE_A) != 0) { if (safe_to_clear_referenced(pmap, pmap_load(l3))) { /* * TODO: We don't handle the access flag * at all. We need to be able to set it in * the exception handler. */ panic("RISCVTODO: safe_to_clear_referenced\n"); } else if ((pmap_load(l3) & PTE_SW_WIRED) == 0) { /* * Wired pages cannot be paged out so * doing accessed bit emulation for * them is wasted effort. We do the * hard work for unwired pages only. */ pmap_remove_l3(pmap, l3, pv->pv_va, pmap_load(l2), &free, &lock); pmap_invalidate_page(pmap, pv->pv_va); cleared++; if (pvf == pv) pvf = NULL; pv = NULL; KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), ("inconsistent pv lock %p %p for page %p", lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); } else not_cleared++; } PMAP_UNLOCK(pmap); /* Rotate the PV list if it has more than one entry. */ if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); m->md.pv_gen++; } } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + not_cleared < PMAP_TS_REFERENCED_MAX); out: rw_wunlock(lock); rw_runlock(&pvh_global_lock); pmap_free_zero_pages(&free); return (cleared + not_cleared); } /* * Apply the given advice to the specified range of addresses within the * given pmap. Depending on the advice, clear the referenced and/or * modified flags in each mapping and set the mapped page's dirty field. */ void pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) { } /* * Clear the modify bits on the specified physical page. */ void pmap_clear_modify(vm_page_t m) { KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("pmap_clear_modify: page %p is not managed", m)); VM_OBJECT_ASSERT_WLOCKED(m->object); KASSERT(!vm_page_xbusied(m), ("pmap_clear_modify: page %p is exclusive busied", m)); /* * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. * If the object containing the page is locked and the page is not * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. */ if ((m->aflags & PGA_WRITEABLE) == 0) return; /* RISCVTODO: We lack support for tracking if a page is modified */ } void * pmap_mapbios(vm_paddr_t pa, vm_size_t size) { return ((void *)PHYS_TO_DMAP(pa)); } void pmap_unmapbios(vm_paddr_t pa, vm_size_t size) { } /* * Sets the memory attribute for the specified page. */ void pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) { m->md.pv_memattr = ma; /* * RISCVTODO: Implement the below (from the amd64 pmap) * If "m" is a normal page, update its direct mapping. This update * can be relied upon to perform any cache operations that are * required for data coherence. */ if ((m->flags & PG_FICTITIOUS) == 0 && PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m))) panic("RISCVTODO: pmap_page_set_memattr"); } /* * perform the pmap work for mincore */ int pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { panic("RISCVTODO: pmap_mincore"); } void pmap_activate(struct thread *td) { pmap_t pmap; critical_enter(); pmap = vmspace_pmap(td->td_proc->p_vmspace); td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1); - __asm __volatile("csrw sptbr, %0" :: "r"(td->td_pcb->pcb_l1addr)); + __asm __volatile("csrw sptbr, %0" :: "r"(td->td_pcb->pcb_l1addr >> PAGE_SHIFT)); pmap_invalidate_all(pmap); critical_exit(); } void pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz) { panic("RISCVTODO: pmap_sync_icache"); } /* * Increase the starting virtual address of the given mapping if a * different alignment might result in more superpage mappings. */ void pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, vm_offset_t *addr, vm_size_t size) { } /** * Get the kernel virtual address of a set of physical pages. If there are * physical addresses not covered by the DMAP perform a transient mapping * that will be removed when calling pmap_unmap_io_transient. * * \param page The pages the caller wishes to obtain the virtual * address on the kernel memory map. * \param vaddr On return contains the kernel virtual memory address * of the pages passed in the page parameter. * \param count Number of pages passed in. * \param can_fault TRUE if the thread using the mapped pages can take * page faults, FALSE otherwise. * * \returns TRUE if the caller must call pmap_unmap_io_transient when * finished or FALSE otherwise. * */ boolean_t pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, boolean_t can_fault) { vm_paddr_t paddr; boolean_t needs_mapping; int error, i; /* * Allocate any KVA space that we need, this is done in a separate * loop to prevent calling vmem_alloc while pinned. */ needs_mapping = FALSE; for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (__predict_false(paddr >= DMAP_MAX_PHYSADDR)) { error = vmem_alloc(kernel_arena, PAGE_SIZE, M_BESTFIT | M_WAITOK, &vaddr[i]); KASSERT(error == 0, ("vmem_alloc failed: %d", error)); needs_mapping = TRUE; } else { vaddr[i] = PHYS_TO_DMAP(paddr); } } /* Exit early if everything is covered by the DMAP */ if (!needs_mapping) return (FALSE); if (!can_fault) sched_pin(); for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (paddr >= DMAP_MAX_PHYSADDR) { panic( "pmap_map_io_transient: TODO: Map out of DMAP data"); } } return (needs_mapping); } void pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, boolean_t can_fault) { vm_paddr_t paddr; int i; if (!can_fault) sched_unpin(); for (i = 0; i < count; i++) { paddr = VM_PAGE_TO_PHYS(page[i]); if (paddr >= DMAP_MAX_PHYSADDR) { panic("RISCVTODO: pmap_unmap_io_transient: Unmap data"); } } } Index: head/sys/riscv/riscv/swtch.S =================================================================== --- head/sys/riscv/riscv/swtch.S (revision 303659) +++ head/sys/riscv/riscv/swtch.S (revision 303660) @@ -1,274 +1,277 @@ /*- * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include "assym.s" #include "opt_sched.h" #include #include #include #include __FBSDID("$FreeBSD$"); /* * void cpu_throw(struct thread *old, struct thread *new) */ ENTRY(cpu_throw) /* Store the new curthread */ sd a1, PC_CURTHREAD(gp) /* And the new pcb */ ld x13, TD_PCB(a1) sd x13, PC_CURPCB(gp) sfence.vm /* Switch to the new pmap */ ld t0, PCB_L1ADDR(x13) + srli t0, t0, PAGE_SHIFT csrw sptbr, t0 /* TODO: Invalidate the TLB */ sfence.vm /* Load registers */ ld ra, (PCB_RA)(x13) ld sp, (PCB_SP)(x13) /* s[0-11] */ ld s0, (PCB_S + 0 * 8)(x13) ld s1, (PCB_S + 1 * 8)(x13) ld s2, (PCB_S + 2 * 8)(x13) ld s3, (PCB_S + 3 * 8)(x13) ld s4, (PCB_S + 4 * 8)(x13) ld s5, (PCB_S + 5 * 8)(x13) ld s6, (PCB_S + 6 * 8)(x13) ld s7, (PCB_S + 7 * 8)(x13) ld s8, (PCB_S + 8 * 8)(x13) ld s9, (PCB_S + 9 * 8)(x13) ld s10, (PCB_S + 10 * 8)(x13) ld s11, (PCB_S + 11 * 8)(x13) + ret .Lcpu_throw_panic_str: .asciz "cpu_throw: %p\0" END(cpu_throw) /* * void cpu_switch(struct thread *old, struct thread *new, struct mtx *mtx) * * a0 = old * a1 = new * a2 = mtx * x3 to x7, x16 and x17 are caller saved */ ENTRY(cpu_switch) /* Store the new curthread */ sd a1, PC_CURTHREAD(gp) /* And the new pcb */ ld x13, TD_PCB(a1) sd x13, PC_CURPCB(gp) /* Save the old context. */ ld x13, TD_PCB(a0) /* Store ra, sp and the callee-saved registers */ sd ra, (PCB_RA)(x13) sd sp, (PCB_SP)(x13) /* s[0-11] */ sd s0, (PCB_S + 0 * 8)(x13) sd s1, (PCB_S + 1 * 8)(x13) sd s2, (PCB_S + 2 * 8)(x13) sd s3, (PCB_S + 3 * 8)(x13) sd s4, (PCB_S + 4 * 8)(x13) sd s5, (PCB_S + 5 * 8)(x13) sd s6, (PCB_S + 6 * 8)(x13) sd s7, (PCB_S + 7 * 8)(x13) sd s8, (PCB_S + 8 * 8)(x13) sd s9, (PCB_S + 9 * 8)(x13) sd s10, (PCB_S + 10 * 8)(x13) sd s11, (PCB_S + 11 * 8)(x13) /* * Restore the saved context. */ ld x13, TD_PCB(a1) /* * TODO: We may need to flush the cache here if switching * to a user process. */ sfence.vm /* Switch to the new pmap */ ld t0, PCB_L1ADDR(x13) + srli t0, t0, PAGE_SHIFT csrw sptbr, t0 /* TODO: Invalidate the TLB */ sfence.vm /* Release the old thread */ sd a2, TD_LOCK(a0) #if defined(SCHED_ULE) && defined(SMP) /* Spin if TD_LOCK points to a blocked_lock */ la a2, _C_LABEL(blocked_lock) 1: ld t0, TD_LOCK(a1) beq t0, a2, 1b #endif /* Restore the registers */ ld ra, (PCB_RA)(x13) ld sp, (PCB_SP)(x13) /* s[0-11] */ ld s0, (PCB_S + 0 * 8)(x13) ld s1, (PCB_S + 1 * 8)(x13) ld s2, (PCB_S + 2 * 8)(x13) ld s3, (PCB_S + 3 * 8)(x13) ld s4, (PCB_S + 4 * 8)(x13) ld s5, (PCB_S + 5 * 8)(x13) ld s6, (PCB_S + 6 * 8)(x13) ld s7, (PCB_S + 7 * 8)(x13) ld s8, (PCB_S + 8 * 8)(x13) ld s9, (PCB_S + 9 * 8)(x13) ld s10, (PCB_S + 10 * 8)(x13) ld s11, (PCB_S + 11 * 8)(x13) ret .Lcpu_switch_panic_str: .asciz "cpu_switch: %p\0" END(cpu_switch) /* * fork_exit(void (*callout)(void *, struct trapframe *), void *arg, * struct trapframe *frame) */ ENTRY(fork_trampoline) mv a0, s0 mv a1, s1 mv a2, sp call _C_LABEL(fork_exit) /* Restore sstatus */ ld t0, (TF_SSTATUS)(sp) /* Ensure interrupts disabled */ - li t1, ~SSTATUS_IE + li t1, ~SSTATUS_SIE and t0, t0, t1 csrw sstatus, t0 /* Restore exception program counter */ ld t0, (TF_SEPC)(sp) csrw sepc, t0 /* Restore the registers */ ld t0, (TF_T + 0 * 8)(sp) ld t1, (TF_T + 1 * 8)(sp) ld t2, (TF_T + 2 * 8)(sp) ld t3, (TF_T + 3 * 8)(sp) ld t4, (TF_T + 4 * 8)(sp) ld t5, (TF_T + 5 * 8)(sp) ld t6, (TF_T + 6 * 8)(sp) ld s0, (TF_S + 0 * 8)(sp) ld s1, (TF_S + 1 * 8)(sp) ld s2, (TF_S + 2 * 8)(sp) ld s3, (TF_S + 3 * 8)(sp) ld s4, (TF_S + 4 * 8)(sp) ld s5, (TF_S + 5 * 8)(sp) ld s6, (TF_S + 6 * 8)(sp) ld s7, (TF_S + 7 * 8)(sp) ld s8, (TF_S + 8 * 8)(sp) ld s9, (TF_S + 9 * 8)(sp) ld s10, (TF_S + 10 * 8)(sp) ld s11, (TF_S + 11 * 8)(sp) ld a0, (TF_A + 0 * 8)(sp) ld a1, (TF_A + 1 * 8)(sp) ld a2, (TF_A + 2 * 8)(sp) ld a3, (TF_A + 3 * 8)(sp) ld a4, (TF_A + 4 * 8)(sp) ld a5, (TF_A + 5 * 8)(sp) ld a6, (TF_A + 6 * 8)(sp) ld a7, (TF_A + 7 * 8)(sp) /* Load user ra and sp */ ld tp, (TF_TP)(sp) ld ra, (TF_RA)(sp) /* * Store our pcpup on stack, we will load it back * on kernel mode trap. */ sd gp, (TF_SIZE)(sp) ld gp, (TF_GP)(sp) /* Save kernel stack so we can use it doing a user trap */ addi sp, sp, TF_SIZE csrw sscratch, sp /* Load user stack */ ld sp, (TF_SP - TF_SIZE)(sp) - eret + sret END(fork_trampoline) ENTRY(savectx) /* Store ra, sp and the callee-saved registers */ sd ra, (PCB_RA)(a0) sd sp, (PCB_SP)(a0) /* s[0-11] */ sd s0, (PCB_S + 0 * 8)(a0) sd s1, (PCB_S + 1 * 8)(a0) sd s2, (PCB_S + 2 * 8)(a0) sd s3, (PCB_S + 3 * 8)(a0) sd s4, (PCB_S + 4 * 8)(a0) sd s5, (PCB_S + 5 * 8)(a0) sd s6, (PCB_S + 6 * 8)(a0) sd s7, (PCB_S + 7 * 8)(a0) sd s8, (PCB_S + 8 * 8)(a0) sd s9, (PCB_S + 9 * 8)(a0) sd s10, (PCB_S + 10 * 8)(a0) sd s11, (PCB_S + 11 * 8)(a0) /* Store the VFP registers */ #ifdef VFP /* TODO */ #endif ret END(savectx) Index: head/sys/riscv/riscv/timer.c =================================================================== --- head/sys/riscv/riscv/timer.c (revision 303659) +++ head/sys/riscv/riscv/timer.c (revision 303660) @@ -1,299 +1,318 @@ /*- - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /* * RISC-V Timer */ #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define DEFAULT_FREQ 1000000 +#define TIMER_COUNTS 0x00 +#define TIMER_MTIMECMP(cpu) (0x08 + (cpu * 8)) + +#define READ8(_sc, _reg) \ + bus_space_read_8(_sc->bst, _sc->bsh, _reg) +#define WRITE8(_sc, _reg, _val) \ + bus_space_write_8(_sc->bst, _sc->bsh, _reg, _val) + struct riscv_tmr_softc { - struct resource *res[1]; - void *ihl[1]; + struct resource *res[2]; + bus_space_tag_t bst; + bus_space_handle_t bsh; + void *ih; uint32_t clkfreq; struct eventtimer et; }; static struct riscv_tmr_softc *riscv_tmr_sc = NULL; static struct resource_spec timer_spec[] = { + { SYS_RES_MEMORY, 0, RF_ACTIVE }, { SYS_RES_IRQ, 0, RF_ACTIVE }, { -1, 0 } }; static timecounter_get_t riscv_tmr_get_timecount; static struct timecounter riscv_tmr_timecount = { .tc_name = "RISC-V Timecounter", .tc_get_timecount = riscv_tmr_get_timecount, .tc_poll_pps = NULL, .tc_counter_mask = ~0u, .tc_frequency = 0, .tc_quality = 1000, }; static long -get_counts(void) +get_counts(struct riscv_tmr_softc *sc) { - return (csr_read(stime)); + return (READ8(sc, TIMER_COUNTS)); } static unsigned riscv_tmr_get_timecount(struct timecounter *tc) { + struct riscv_tmr_softc *sc; - return (get_counts()); + sc = tc->tc_priv; + + return (get_counts(sc)); } static int riscv_tmr_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { struct riscv_tmr_softc *sc; - int counts; + uint64_t counts; + int cpu; sc = (struct riscv_tmr_softc *)et->et_priv; if (first != 0) { counts = ((uint32_t)et->et_frequency * first) >> 32; + counts += READ8(sc, TIMER_COUNTS); + cpu = PCPU_GET(cpuid); + WRITE8(sc, TIMER_MTIMECMP(cpu), counts); + csr_set(sie, SIE_STIE); machine_command(ECALL_MTIMECMP, counts); + return (0); } return (EINVAL); } static int riscv_tmr_stop(struct eventtimer *et) { struct riscv_tmr_softc *sc; sc = (struct riscv_tmr_softc *)et->et_priv; /* TODO */ return (0); } static int riscv_tmr_intr(void *arg) { struct riscv_tmr_softc *sc; sc = (struct riscv_tmr_softc *)arg; - /* - * Clear interrupt pending bit. - * Note: SIP_STIP bit is not implemented in sip register - * in Spike simulator, so use machine command to clear - * interrupt pending bit in mip. - */ - machine_command(ECALL_CLEAR_PENDING, 0); + csr_clear(sip, SIP_STIP); if (sc->et.et_active) sc->et.et_event_cb(&sc->et, sc->et.et_arg); return (FILTER_HANDLED); } static int riscv_tmr_fdt_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (ofw_bus_is_compatible(dev, "riscv,timer")) { device_set_desc(dev, "RISC-V Timer"); return (BUS_PROBE_DEFAULT); } return (ENXIO); } static int riscv_tmr_attach(device_t dev) { struct riscv_tmr_softc *sc; phandle_t node; pcell_t clock; int error; sc = device_get_softc(dev); if (riscv_tmr_sc) return (ENXIO); /* Get the base clock frequency */ node = ofw_bus_get_node(dev); if (node > 0) { error = OF_getprop(node, "clock-frequency", &clock, sizeof(clock)); if (error > 0) { sc->clkfreq = fdt32_to_cpu(clock); } } if (sc->clkfreq == 0) sc->clkfreq = DEFAULT_FREQ; if (sc->clkfreq == 0) { device_printf(dev, "No clock frequency specified\n"); return (ENXIO); } if (bus_alloc_resources(dev, timer_spec, sc->res)) { device_printf(dev, "could not allocate resources\n"); return (ENXIO); } + /* Memory interface */ + sc->bst = rman_get_bustag(sc->res[0]); + sc->bsh = rman_get_bushandle(sc->res[0]); + riscv_tmr_sc = sc; /* Setup IRQs handler */ - error = bus_setup_intr(dev, sc->res[0], INTR_TYPE_CLK, - riscv_tmr_intr, NULL, sc, &sc->ihl[0]); + error = bus_setup_intr(dev, sc->res[1], INTR_TYPE_CLK, + riscv_tmr_intr, NULL, sc, &sc->ih); if (error) { device_printf(dev, "Unable to alloc int resource.\n"); return (ENXIO); } riscv_tmr_timecount.tc_frequency = sc->clkfreq; + riscv_tmr_timecount.tc_priv = sc; tc_init(&riscv_tmr_timecount); sc->et.et_name = "RISC-V Eventtimer"; sc->et.et_flags = ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU; sc->et.et_quality = 1000; sc->et.et_frequency = sc->clkfreq; sc->et.et_min_period = (0x00000002LLU << 32) / sc->et.et_frequency; sc->et.et_max_period = (0xfffffffeLLU << 32) / sc->et.et_frequency; sc->et.et_start = riscv_tmr_start; sc->et.et_stop = riscv_tmr_stop; sc->et.et_priv = sc; et_register(&sc->et); return (0); } static device_method_t riscv_tmr_fdt_methods[] = { DEVMETHOD(device_probe, riscv_tmr_fdt_probe), DEVMETHOD(device_attach, riscv_tmr_attach), { 0, 0 } }; static driver_t riscv_tmr_fdt_driver = { "timer", riscv_tmr_fdt_methods, sizeof(struct riscv_tmr_softc), }; static devclass_t riscv_tmr_fdt_devclass; EARLY_DRIVER_MODULE(timer, simplebus, riscv_tmr_fdt_driver, riscv_tmr_fdt_devclass, 0, 0, BUS_PASS_TIMER + BUS_PASS_ORDER_MIDDLE); EARLY_DRIVER_MODULE(timer, ofwbus, riscv_tmr_fdt_driver, riscv_tmr_fdt_devclass, 0, 0, BUS_PASS_TIMER + BUS_PASS_ORDER_MIDDLE); void DELAY(int usec) { - int32_t counts, counts_per_usec; - uint32_t first, last; + int64_t counts, counts_per_usec; + uint64_t first, last; /* * Check the timers are setup, if not just * use a for loop for the meantime */ if (riscv_tmr_sc == NULL) { for (; usec > 0; usec--) for (counts = 200; counts > 0; counts--) /* * Prevent the compiler from optimizing * out the loop */ cpufunc_nullop(); return; } /* Get the number of times to count */ counts_per_usec = ((riscv_tmr_timecount.tc_frequency / 1000000) + 1); /* * Clamp the timeout at a maximum value (about 32 seconds with * a 66MHz clock). *Nobody* should be delay()ing for anywhere * near that length of time and if they are, they should be hung * out to dry. */ if (usec >= (0x80000000U / counts_per_usec)) counts = (0x80000000U / counts_per_usec) - 1; else counts = usec * counts_per_usec; - first = get_counts(); + first = get_counts(riscv_tmr_sc); while (counts > 0) { - last = get_counts(); - counts -= (int32_t)(last - first); + last = get_counts(riscv_tmr_sc); + counts -= (int64_t)(last - first); first = last; } } Index: head/sys/riscv/riscv/trap.c =================================================================== --- head/sys/riscv/riscv/trap.c (revision 303659) +++ head/sys/riscv/riscv/trap.c (revision 303660) @@ -1,361 +1,373 @@ /*- - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #ifdef KDB #include #endif #include #include #include #include #include #include #include #include #include #include #include #ifdef KDTRACE_HOOKS #include #endif int (*dtrace_invop_jump_addr)(struct trapframe *); extern register_t fsu_intr_fault; /* Called from exception.S */ void do_trap_supervisor(struct trapframe *); void do_trap_user(struct trapframe *); static __inline void call_trapsignal(struct thread *td, int sig, int code, void *addr) { ksiginfo_t ksi; ksiginfo_init_trap(&ksi); ksi.ksi_signo = sig; ksi.ksi_code = code; ksi.ksi_addr = addr; trapsignal(td, &ksi); } int cpu_fetch_syscall_args(struct thread *td, struct syscall_args *sa) { struct proc *p; register_t *ap; int nap; nap = 8; p = td->td_proc; ap = &td->td_frame->tf_a[0]; sa->code = td->td_frame->tf_t[0]; if (sa->code == SYS_syscall || sa->code == SYS___syscall) { sa->code = *ap++; nap--; } if (p->p_sysent->sv_mask) sa->code &= p->p_sysent->sv_mask; if (sa->code >= p->p_sysent->sv_size) sa->callp = &p->p_sysent->sv_table[0]; else sa->callp = &p->p_sysent->sv_table[sa->code]; sa->narg = sa->callp->sy_narg; memcpy(sa->args, ap, nap * sizeof(register_t)); if (sa->narg > nap) panic("TODO: Could we have more then 8 args?"); td->td_retval[0] = 0; td->td_retval[1] = 0; return (0); } #include "../../kern/subr_syscall.c" static void dump_regs(struct trapframe *frame) { int n; int i; n = (sizeof(frame->tf_t) / sizeof(frame->tf_t[0])); for (i = 0; i < n; i++) printf("t[%d] == 0x%016lx\n", i, frame->tf_t[i]); n = (sizeof(frame->tf_s) / sizeof(frame->tf_s[0])); for (i = 0; i < n; i++) printf("s[%d] == 0x%016lx\n", i, frame->tf_s[i]); n = (sizeof(frame->tf_a) / sizeof(frame->tf_a[0])); for (i = 0; i < n; i++) printf("a[%d] == 0x%016lx\n", i, frame->tf_a[i]); printf("sepc == 0x%016lx\n", frame->tf_sepc); printf("sstatus == 0x%016lx\n", frame->tf_sstatus); } static void svc_handler(struct trapframe *frame) { struct syscall_args sa; struct thread *td; int error; td = curthread; td->td_frame = frame; error = syscallenter(td, &sa); syscallret(td, error, &sa); } static void data_abort(struct trapframe *frame, int lower) { struct vm_map *map; uint64_t sbadaddr; struct thread *td; struct pcb *pcb; vm_prot_t ftype; vm_offset_t va; struct proc *p; int ucode; int error; int sig; #ifdef KDB if (kdb_active) { kdb_reenter(); return; } #endif td = curthread; pcb = td->td_pcb; /* * Special case for fuswintr and suswintr. These can't sleep so * handle them early on in the trap handler. */ if (__predict_false(pcb->pcb_onfault == (vm_offset_t)&fsu_intr_fault)) { frame->tf_sepc = pcb->pcb_onfault; return; } sbadaddr = frame->tf_sbadaddr; p = td->td_proc; if (lower) map = &td->td_proc->p_vmspace->vm_map; else { /* The top bit tells us which range to use */ if ((sbadaddr >> 63) == 1) map = kernel_map; else map = &td->td_proc->p_vmspace->vm_map; } va = trunc_page(sbadaddr); - if (frame->tf_scause == EXCP_STORE_ACCESS_FAULT) { + if (frame->tf_scause == EXCP_FAULT_STORE) { ftype = (VM_PROT_READ | VM_PROT_WRITE); } else { ftype = (VM_PROT_READ); } if (map != kernel_map) { /* * Keep swapout from messing with us during this * critical time. */ PROC_LOCK(p); ++p->p_lock; PROC_UNLOCK(p); /* Fault in the user page: */ error = vm_fault(map, va, ftype, VM_FAULT_NORMAL); PROC_LOCK(p); --p->p_lock; PROC_UNLOCK(p); } else { /* * Don't have to worry about process locking or stacks in the * kernel. */ error = vm_fault(map, va, ftype, VM_FAULT_NORMAL); } if (error != KERN_SUCCESS) { if (lower) { sig = SIGSEGV; if (error == KERN_PROTECTION_FAILURE) ucode = SEGV_ACCERR; else ucode = SEGV_MAPERR; call_trapsignal(td, sig, ucode, (void *)sbadaddr); } else { if (td->td_intr_nesting_level == 0 && pcb->pcb_onfault != 0) { frame->tf_a[0] = error; frame->tf_sepc = pcb->pcb_onfault; return; } dump_regs(frame); panic("vm_fault failed: %lx, va 0x%016lx", frame->tf_sepc, sbadaddr); } } if (lower) userret(td, frame); } void do_trap_supervisor(struct trapframe *frame) { uint64_t exception; + uint64_t sstatus; + /* Ensure we came from supervisor mode, interrupts disabled */ + __asm __volatile("csrr %0, sstatus" : "=&r" (sstatus)); + KASSERT((sstatus & (SSTATUS_SPP | SSTATUS_SIE)) == SSTATUS_SPP, + ("We must came from S mode with interrupts disabled")); + exception = (frame->tf_scause & EXCP_MASK); if (frame->tf_scause & EXCP_INTR) { /* Interrupt */ riscv_cpu_intr(frame); return; } #ifdef KDTRACE_HOOKS if (dtrace_trap_func != NULL && (*dtrace_trap_func)(frame, exception)) return; #endif CTR3(KTR_TRAP, "do_trap_supervisor: curthread: %p, sepc: %lx, frame: %p", curthread, frame->tf_sepc, frame); switch(exception) { - case EXCP_LOAD_ACCESS_FAULT: - case EXCP_STORE_ACCESS_FAULT: - case EXCP_INSTR_ACCESS_FAULT: + case EXCP_FAULT_LOAD: + case EXCP_FAULT_STORE: + case EXCP_FAULT_FETCH: data_abort(frame, 0); break; - case EXCP_INSTR_BREAKPOINT: + case EXCP_BREAKPOINT: #ifdef KDTRACE_HOOKS if (dtrace_invop_jump_addr != 0) { dtrace_invop_jump_addr(frame); break; } #endif #ifdef KDB kdb_trap(exception, 0, frame); #else dump_regs(frame); panic("No debugger in kernel.\n"); #endif break; - case EXCP_INSTR_ILLEGAL: + case EXCP_ILLEGAL_INSTRUCTION: dump_regs(frame); panic("Illegal instruction at 0x%016lx\n", frame->tf_sepc); break; default: dump_regs(frame); panic("Unknown kernel exception %x badaddr %lx\n", exception, frame->tf_sbadaddr); } } void do_trap_user(struct trapframe *frame) { uint64_t exception; struct thread *td; + uint64_t sstatus; td = curthread; td->td_frame = frame; + /* Ensure we came from usermode, interrupts disabled */ + __asm __volatile("csrr %0, sstatus" : "=&r" (sstatus)); + KASSERT((sstatus & (SSTATUS_SPP | SSTATUS_SIE)) == 0, + ("We must came from U mode with interrupts disabled")); + exception = (frame->tf_scause & EXCP_MASK); if (frame->tf_scause & EXCP_INTR) { /* Interrupt */ riscv_cpu_intr(frame); return; } CTR3(KTR_TRAP, "do_trap_user: curthread: %p, sepc: %lx, frame: %p", curthread, frame->tf_sepc, frame); switch(exception) { - case EXCP_LOAD_ACCESS_FAULT: - case EXCP_STORE_ACCESS_FAULT: - case EXCP_INSTR_ACCESS_FAULT: + case EXCP_FAULT_LOAD: + case EXCP_FAULT_STORE: + case EXCP_FAULT_FETCH: data_abort(frame, 1); break; - case EXCP_UMODE_ENV_CALL: + case EXCP_USER_ECALL: frame->tf_sepc += 4; /* Next instruction */ svc_handler(frame); break; - case EXCP_INSTR_ILLEGAL: + case EXCP_ILLEGAL_INSTRUCTION: call_trapsignal(td, SIGILL, ILL_ILLTRP, (void *)frame->tf_sepc); userret(td, frame); break; - case EXCP_INSTR_BREAKPOINT: + case EXCP_BREAKPOINT: call_trapsignal(td, SIGTRAP, TRAP_BRKPT, (void *)frame->tf_sepc); userret(td, frame); break; default: dump_regs(frame); panic("Unknown userland exception %x badaddr %lx\n", exception, frame->tf_sbadaddr); } } Index: head/sys/riscv/riscv/vm_machdep.c =================================================================== --- head/sys/riscv/riscv/vm_machdep.c (revision 303659) +++ head/sys/riscv/riscv/vm_machdep.c (revision 303660) @@ -1,252 +1,253 @@ /*- - * Copyright (c) 2015 Ruslan Bukin + * Copyright (c) 2015-2016 Ruslan Bukin * All rights reserved. * * Portions of this software were developed by SRI International and the * University of Cambridge Computer Laboratory under DARPA/AFRL contract * FA8750-10-C-0237 ("CTSRD"), as part of the DARPA CRASH research programme. * * Portions of this software were developed by the University of Cambridge * Computer Laboratory as part of the CTSRD Project, with support from the * UK Higher Education Innovation Fund (HEIF). * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child * ready to run and return to user mode. */ void cpu_fork(struct thread *td1, struct proc *p2, struct thread *td2, int flags) { struct pcb *pcb2; struct trapframe *tf; if ((flags & RFPROC) == 0) return; pcb2 = (struct pcb *)(td2->td_kstack + td2->td_kstack_pages * PAGE_SIZE) - 1; td2->td_pcb = pcb2; bcopy(td1->td_pcb, pcb2, sizeof(*pcb2)); td2->td_pcb->pcb_l1addr = vtophys(vmspace_pmap(td2->td_proc->p_vmspace)->pm_l1); tf = (struct trapframe *)STACKALIGN((struct trapframe *)pcb2 - 1); bcopy(td1->td_frame, tf, sizeof(*tf)); /* Clear syscall error flag */ tf->tf_t[0] = 0; /* Arguments for child */ tf->tf_a[0] = 0; tf->tf_a[1] = 0; - tf->tf_sstatus = SSTATUS_PIE; + tf->tf_sstatus = (SSTATUS_SPIE); + tf->tf_sstatus |= (MSTATUS_PRV_U << MSTATUS_SPP_SHIFT); td2->td_frame = tf; /* Set the return value registers for fork() */ td2->td_pcb->pcb_s[0] = (uintptr_t)fork_return; td2->td_pcb->pcb_s[1] = (uintptr_t)td2; td2->td_pcb->pcb_ra = (uintptr_t)fork_trampoline; td2->td_pcb->pcb_sp = (uintptr_t)td2->td_frame; /* Setup to release spin count in fork_exit(). */ td2->td_md.md_spinlock_count = 1; - td2->td_md.md_saved_sstatus_ie = 1; + td2->td_md.md_saved_sstatus_ie = (SSTATUS_SIE); } void cpu_reset(void) { printf("cpu_reset"); while(1) __asm volatile("wfi" ::: "memory"); } void cpu_thread_swapin(struct thread *td) { } void cpu_thread_swapout(struct thread *td) { } void cpu_set_syscall_retval(struct thread *td, int error) { struct trapframe *frame; frame = td->td_frame; switch (error) { case 0: frame->tf_a[0] = td->td_retval[0]; frame->tf_a[1] = td->td_retval[1]; frame->tf_t[0] = 0; /* syscall succeeded */ break; case ERESTART: frame->tf_sepc -= 4; /* prev instruction */ break; case EJUSTRETURN: break; default: frame->tf_a[0] = error; frame->tf_t[0] = 1; /* syscall error */ break; } } /* * Initialize machine state, mostly pcb and trap frame for a new * thread, about to return to userspace. Put enough state in the new * thread's PCB to get it to go back to the fork_return(), which * finalizes the thread state and handles peculiarities of the first * return to userspace for the new thread. */ void cpu_copy_thread(struct thread *td, struct thread *td0) { bcopy(td0->td_frame, td->td_frame, sizeof(struct trapframe)); bcopy(td0->td_pcb, td->td_pcb, sizeof(struct pcb)); td->td_pcb->pcb_s[0] = (uintptr_t)fork_return; td->td_pcb->pcb_s[1] = (uintptr_t)td; td->td_pcb->pcb_ra = (uintptr_t)fork_trampoline; td->td_pcb->pcb_sp = (uintptr_t)td->td_frame; /* Setup to release spin count in fork_exit(). */ td->td_md.md_spinlock_count = 1; - td->td_md.md_saved_sstatus_ie = 1; + td->td_md.md_saved_sstatus_ie = (SSTATUS_SIE); } /* * Set that machine state for performing an upcall that starts * the entry function with the given argument. */ void cpu_set_upcall(struct thread *td, void (*entry)(void *), void *arg, stack_t *stack) { struct trapframe *tf = td->td_frame; tf->tf_sp = STACKALIGN((uintptr_t)stack->ss_sp + stack->ss_size); tf->tf_sepc = (register_t)entry; tf->tf_a[0] = (register_t)arg; } int cpu_set_user_tls(struct thread *td, void *tls_base) { struct pcb *pcb; if ((uintptr_t)tls_base >= VM_MAXUSER_ADDRESS) return (EINVAL); pcb = td->td_pcb; pcb->pcb_tp = (register_t)tls_base; return (0); } void cpu_thread_exit(struct thread *td) { } void cpu_thread_alloc(struct thread *td) { td->td_pcb = (struct pcb *)(td->td_kstack + td->td_kstack_pages * PAGE_SIZE) - 1; td->td_frame = (struct trapframe *)STACKALIGN( (caddr_t)td->td_pcb - 8 - sizeof(struct trapframe)); } void cpu_thread_free(struct thread *td) { } void cpu_thread_clean(struct thread *td) { } /* * Intercept the return address from a freshly forked process that has NOT * been scheduled yet. * * This is needed to make kernel threads stay in kernel mode. */ void cpu_fork_kthread_handler(struct thread *td, void (*func)(void *), void *arg) { td->td_pcb->pcb_s[0] = (uintptr_t)func; td->td_pcb->pcb_s[1] = (uintptr_t)arg; td->td_pcb->pcb_ra = (uintptr_t)fork_trampoline; td->td_pcb->pcb_sp = (uintptr_t)td->td_frame; } void cpu_exit(struct thread *td) { } void swi_vm(void *v) { /* Nothing to do here - busdma bounce buffers are not implemented. */ }