Index: head/lib/libc/Makefile =================================================================== --- head/lib/libc/Makefile (revision 304284) +++ head/lib/libc/Makefile (revision 304285) @@ -1,201 +1,204 @@ # @(#)Makefile 8.2 (Berkeley) 2/3/94 # $FreeBSD$ PACKAGE= clibs SHLIBDIR?= /lib .include # Force building of libc_pic.a MK_TOOLCHAIN= yes LIBC_SRCTOP?= ${.CURDIR} # Pick the current architecture directory for libc. In general, this is # named MACHINE_CPUARCH, but some ABIs are different enough to require # their own libc, so allow a directory named MACHINE_ARCH to override this. .if exists(${LIBC_SRCTOP}/${MACHINE_ARCH}) LIBC_ARCH=${MACHINE_ARCH} .else LIBC_ARCH=${MACHINE_CPUARCH} .endif # All library objects contain FreeBSD revision strings by default; they may be # excluded as a space-saving measure. To produce a library that does # not contain these strings, add -DSTRIP_FBSDID (see ) to CFLAGS # below. Note: there are no IDs for syscall stubs whose sources are generated. # To include legacy CSRG sccsid strings, add -DLIBC_SCCS and -DSYSLIBC_SCCS # to CFLAGS below. -DSYSLIBC_SCCS affects just the system call stubs. LIB=c SHLIB_MAJOR= 7 SHLIB_LDSCRIPT=libc.ldscript SHLIB_LDSCRIPT_LINKS=libxnet.so WARNS?= 2 CFLAGS+=-I${LIBC_SRCTOP}/include -I${LIBC_SRCTOP}/../../include CFLAGS+=-I${LIBC_SRCTOP}/${LIBC_ARCH} .if ${MK_NLS} != "no" CFLAGS+=-DNLS .endif CLEANFILES+=tags INSTALL_PIC_ARCHIVE= PRECIOUSLIB= .ifndef NO_THREAD_STACK_UNWIND CANCELPOINTS_CFLAGS=-fexceptions CFLAGS+=${CANCELPOINTS_CFLAGS} .endif # # Link with static libcompiler_rt.a. # LDFLAGS+= -nodefaultlibs LIBADD+= compiler_rt .if ${MK_SSP} != "no" LIBADD+= ssp_nonshared .endif # Extras that live in either libc.a or libc_nonshared.a LIBC_NONSHARED_SRCS= # Define (empty) variables so that make doesn't give substitution # errors if the included makefiles don't change these: MDSRCS= MISRCS= MDASM= MIASM= NOASM= .include "${LIBC_SRCTOP}/${LIBC_ARCH}/Makefile.inc" .include "${LIBC_SRCTOP}/db/Makefile.inc" .include "${LIBC_SRCTOP}/compat-43/Makefile.inc" .include "${LIBC_SRCTOP}/gdtoa/Makefile.inc" .include "${LIBC_SRCTOP}/gen/Makefile.inc" .include "${LIBC_SRCTOP}/gmon/Makefile.inc" .if ${MK_ICONV} != "no" .include "${LIBC_SRCTOP}/iconv/Makefile.inc" .endif .include "${LIBC_SRCTOP}/inet/Makefile.inc" .include "${LIBC_SRCTOP}/isc/Makefile.inc" .include "${LIBC_SRCTOP}/locale/Makefile.inc" .include "${LIBC_SRCTOP}/md/Makefile.inc" .include "${LIBC_SRCTOP}/nameser/Makefile.inc" .include "${LIBC_SRCTOP}/net/Makefile.inc" .include "${LIBC_SRCTOP}/nls/Makefile.inc" .include "${LIBC_SRCTOP}/posix1e/Makefile.inc" .if ${LIBC_ARCH} != "aarch64" && \ ${LIBC_ARCH} != "amd64" && \ ${LIBC_ARCH} != "powerpc64" && \ ${LIBC_ARCH} != "riscv" && \ ${LIBC_ARCH} != "sparc64" && \ ${MACHINE_ARCH:Mmipsn32*} == "" && \ ${MACHINE_ARCH:Mmips64*} == "" .include "${LIBC_SRCTOP}/quad/Makefile.inc" .endif .include "${LIBC_SRCTOP}/regex/Makefile.inc" .include "${LIBC_SRCTOP}/resolv/Makefile.inc" .include "${LIBC_SRCTOP}/stdio/Makefile.inc" .include "${LIBC_SRCTOP}/stdlib/Makefile.inc" .include "${LIBC_SRCTOP}/stdlib/jemalloc/Makefile.inc" .include "${LIBC_SRCTOP}/stdtime/Makefile.inc" .include "${LIBC_SRCTOP}/string/Makefile.inc" .include "${LIBC_SRCTOP}/sys/Makefile.inc" .include "${LIBC_SRCTOP}/secure/Makefile.inc" .include "${LIBC_SRCTOP}/rpc/Makefile.inc" .include "${LIBC_SRCTOP}/uuid/Makefile.inc" .include "${LIBC_SRCTOP}/xdr/Makefile.inc" .if (${LIBC_ARCH} == "arm" && \ (${MACHINE_ARCH:Marmv6*} == "" || (defined(CPUTYPE) && ${CPUTYPE:M*soft*}))) || \ ${LIBC_ARCH} == "mips" .include "${LIBC_SRCTOP}/softfloat/Makefile.inc" .endif +.if ${LIBC_ARCH} == "i386" || ${LIBC_ARCH} == "amd64" +.include "${LIBC_SRCTOP}/x86/sys/Makefile.inc" +.endif .if ${MK_NIS} != "no" CFLAGS+= -DYP .include "${LIBC_SRCTOP}/yp/Makefile.inc" .endif .include "${LIBC_SRCTOP}/capability/Makefile.inc" .if ${MK_HESIOD} != "no" CFLAGS+= -DHESIOD .endif .if ${MK_FP_LIBC} == "no" CFLAGS+= -DNO_FLOATING_POINT .endif .if ${MK_NS_CACHING} != "no" CFLAGS+= -DNS_CACHING .endif .if defined(_FREEFALL_CONFIG) CFLAGS+=-D_FREEFALL_CONFIG .endif STATICOBJS+=${LIBC_NONSHARED_SRCS:S/.c$/.o/} VERSION_DEF=${LIBC_SRCTOP}/Versions.def SYMBOL_MAPS=${SYM_MAPS} CFLAGS+= -DSYMBOL_VERSIONING # If there are no machine dependent sources, append all the # machine-independent sources: .if empty(MDSRCS) SRCS+= ${MISRCS} .else # Append machine-dependent sources, then append machine-independent sources # for which there is no machine-dependent variant. SRCS+= ${MDSRCS} .for _src in ${MISRCS} .if ${MDSRCS:R:M${_src:R}} == "" SRCS+= ${_src} .endif .endfor .endif KQSRCS= adddi3.c anddi3.c ashldi3.c ashrdi3.c cmpdi2.c divdi3.c iordi3.c \ lshldi3.c lshrdi3.c moddi3.c muldi3.c negdi2.c notdi2.c qdivrem.c \ subdi3.c ucmpdi2.c udivdi3.c umoddi3.c xordi3.c KSRCS= bcmp.c ffs.c ffsl.c fls.c flsl.c mcount.c strcat.c strchr.c \ strcmp.c strcpy.c strlen.c strncpy.c strrchr.c libkern: libkern.gen libkern.${LIBC_ARCH} libkern.gen: ${KQSRCS} ${KSRCS} ${CP} ${LIBC_SRCTOP}/quad/quad.h ${.ALLSRC} ${DESTDIR}/sys/libkern libkern.${LIBC_ARCH}:: ${KMSRCS} .if defined(KMSRCS) && !empty(KMSRCS) ${CP} ${.ALLSRC} ${DESTDIR}/sys/libkern/${LIBC_ARCH} .endif .if ${MK_TESTS} != "no" SUBDIR+= tests .endif .include .if !defined(_SKIP_BUILD) # We need libutil.h, get it directly to avoid # recording a build dependency CFLAGS+= -I${SRCTOP}/lib/libutil # Same issue with libm MSUN_ARCH_SUBDIR != ${MAKE} -B -C ${SRCTOP}/lib/msun -V ARCH_SUBDIR # unfortunately msun/src contains both private and public headers CFLAGS+= -I${SRCTOP}/lib/msun/${MSUN_ARCH_SUBDIR} .if ${MACHINE_CPUARCH} == "i386" || ${MACHINE_CPUARCH} == "amd64" CFLAGS+= -I${SRCTOP}/lib/msun/x86 .endif CFLAGS+= -I${SRCTOP}/lib/msun/src # and we do not want to record a dependency on msun .if ${.MAKE.LEVEL} > 0 GENDIRDEPS_FILTER+= N${RELDIR:H}/msun .endif .endif # Disable warnings in contributed sources. CWARNFLAGS:= ${.IMPSRC:Ngdtoa_*.c:C/^.+$/${CWARNFLAGS}/:C/^$/-w/} # XXX For now, we don't allow libc to be compiled with # -fstack-protector-all because it breaks rtld. We may want to make a librtld # in the future to circumvent this. SSP_CFLAGS:= ${SSP_CFLAGS:S/^-fstack-protector-all$/-fstack-protector/} # Disable stack protection for SSP symbols. SSP_CFLAGS:= ${.IMPSRC:N*/stack_protector.c:C/^.+$/${SSP_CFLAGS}/} # Generate stack unwinding tables for cancellation points CANCELPOINTS_CFLAGS:= ${.IMPSRC:Mcancelpoints_*:C/^.+$/${CANCELPOINTS_CFLAGS}/:C/^$//} Index: head/lib/libc/aarch64/sys/__vdso_gettc.c =================================================================== --- head/lib/libc/aarch64/sys/__vdso_gettc.c (revision 304284) +++ head/lib/libc/aarch64/sys/__vdso_gettc.c (revision 304285) @@ -1,74 +1,76 @@ /*- * Copyright (c) 2015 The FreeBSD Foundation * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include +#include #include "libc_private.h" static inline uint64_t cp15_cntvct_get(void) { uint64_t reg; __asm __volatile("mrs %0, cntvct_el0" : "=r" (reg)); return (reg); } static inline uint64_t cp15_cntpct_get(void) { uint64_t reg; __asm __volatile("mrs %0, cntpct_el0" : "=r" (reg)); return (reg); } #pragma weak __vdso_gettc -u_int -__vdso_gettc(const struct vdso_timehands *th) +int +__vdso_gettc(const struct vdso_timehands *th, u_int *tc) { - uint64_t val; + if (th->th_algo != VDSO_TH_ALGO_ARM_GENTIM) + return (ENOSYS); __asm __volatile("isb" : : : "memory"); - val = th->th_physical == 0 ? cp15_cntvct_get() : cp15_cntpct_get(); - return (val); + *tc = th->th_physical == 0 ? cp15_cntvct_get() : cp15_cntpct_get(); + return (0); } #pragma weak __vdso_gettimekeep int __vdso_gettimekeep(struct vdso_timekeep **tk) { return (_elf_aux_info(AT_TIMEKEEP, tk, sizeof(*tk))); } Index: head/lib/libc/amd64/sys/__vdso_gettc.c =================================================================== --- head/lib/libc/amd64/sys/__vdso_gettc.c (revision 304284) +++ head/lib/libc/amd64/sys/__vdso_gettc.c (nonexistent) @@ -1,70 +0,0 @@ -/*- - * Copyright (c) 2012 Konstantin Belousov - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include -#include -#include -#include -#include "libc_private.h" - -static u_int -__vdso_gettc_low(const struct vdso_timehands *th) -{ - u_int rv; - - __asm __volatile("lfence; rdtsc; shrd %%cl, %%edx, %0" - : "=a" (rv) : "c" (th->th_x86_shift) : "edx"); - return (rv); -} - -static u_int -__vdso_rdtsc32(void) -{ - u_int rv; - - __asm __volatile("lfence;rdtsc" : "=a" (rv) : : "edx"); - return (rv); -} - -#pragma weak __vdso_gettc -u_int -__vdso_gettc(const struct vdso_timehands *th) -{ - - return (th->th_x86_shift > 0 ? __vdso_gettc_low(th) : - __vdso_rdtsc32()); -} - -#pragma weak __vdso_gettimekeep -int -__vdso_gettimekeep(struct vdso_timekeep **tk) -{ - - return (_elf_aux_info(AT_TIMEKEEP, tk, sizeof(*tk))); -} Property changes on: head/lib/libc/amd64/sys/__vdso_gettc.c ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: head/lib/libc/amd64/sys/Makefile.inc =================================================================== --- head/lib/libc/amd64/sys/Makefile.inc (revision 304284) +++ head/lib/libc/amd64/sys/Makefile.inc (revision 304285) @@ -1,13 +1,13 @@ # from: Makefile.inc,v 1.1 1993/09/03 19:04:23 jtc Exp # $FreeBSD$ SRCS+= amd64_get_fsbase.c amd64_get_gsbase.c amd64_set_fsbase.c \ - amd64_set_gsbase.c __vdso_gettc.c + amd64_set_gsbase.c MDASM= vfork.S brk.S cerror.S exect.S getcontext.S ptrace.S \ sbrk.S setlogin.S sigreturn.S # Don't generate default code for these syscalls: NOASM= break.o exit.o getlogin.o openbsd_poll.o sstk.o vfork.o yield.o PSEUDO= _getlogin.o _exit.o Index: head/lib/libc/arm/sys/__vdso_gettc.c =================================================================== --- head/lib/libc/arm/sys/__vdso_gettc.c (revision 304284) +++ head/lib/libc/arm/sys/__vdso_gettc.c (revision 304285) @@ -1,86 +1,89 @@ /*- * Copyright (c) 2015 The FreeBSD Foundation * * This software was developed by Konstantin Belousov * under sponsorship from the FreeBSD Foundation. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include +#include #include "libc_private.h" #if __ARM_ARCH >= 6 static inline uint64_t cp15_cntvct_get(void) { uint64_t reg; __asm __volatile("mrrc\tp15, 1, %Q0, %R0, c14" : "=r" (reg)); return (reg); } static inline uint64_t cp15_cntpct_get(void) { uint64_t reg; __asm __volatile("mrrc\tp15, 0, %Q0, %R0, c14" : "=r" (reg)); return (reg); } #endif #pragma weak __vdso_gettc -u_int -__vdso_gettc(const struct vdso_timehands *th) +int +__vdso_gettc(const struct vdso_timehands *th, u_int *tc) { - uint64_t val; + if (th->th_algo != VDSO_TH_ALGO_ARM_GENTIM) + return (ENOSYS); #if __ARM_ARCH >= 6 /* * Userspace gettimeofday() is only enabled on ARMv7 CPUs, but * libc is compiled for ARMv6. Due to clang issues, .arch * armv7-a directive does not work. */ __asm __volatile(".word\t0xf57ff06f" : : : "memory"); /* isb */ - val = th->th_physical == 0 ? cp15_cntvct_get() : cp15_cntpct_get(); + *tc = th->th_physical == 0 ? cp15_cntvct_get() : cp15_cntpct_get(); + return (0); #else - val = 0; + *tc = 0; + return (ENOSYS); #endif - return (val); } #pragma weak __vdso_gettimekeep int __vdso_gettimekeep(struct vdso_timekeep **tk) { return (_elf_aux_info(AT_TIMEKEEP, tk, sizeof(*tk))); } Index: head/lib/libc/i386/sys/__vdso_gettc.c =================================================================== --- head/lib/libc/i386/sys/__vdso_gettc.c (revision 304284) +++ head/lib/libc/i386/sys/__vdso_gettc.c (nonexistent) @@ -1,114 +0,0 @@ -/*- - * Copyright (c) 2012 Konstantin Belousov - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include -__FBSDID("$FreeBSD$"); - -#include -#include -#include -#include -#include -#include -#include "libc_private.h" - -static int lfence_works = -1; - -static int -get_lfence_usage(void) -{ - u_int cpuid_supported, p[4]; - - if (lfence_works == -1) { - __asm __volatile( - " pushfl\n" - " popl %%eax\n" - " movl %%eax,%%ecx\n" - " xorl $0x200000,%%eax\n" - " pushl %%eax\n" - " popfl\n" - " pushfl\n" - " popl %%eax\n" - " xorl %%eax,%%ecx\n" - " je 1f\n" - " movl $1,%0\n" - " jmp 2f\n" - "1: movl $0,%0\n" - "2:\n" - : "=r" (cpuid_supported) : : "eax", "ecx"); - if (cpuid_supported) { - __asm __volatile( - " pushl %%ebx\n" - " cpuid\n" - " movl %%ebx,%1\n" - " popl %%ebx\n" - : "=a" (p[0]), "=r" (p[1]), "=c" (p[2]), "=d" (p[3]) - : "0" (0x1)); - lfence_works = (p[3] & CPUID_SSE2) != 0; - } else - lfence_works = 0; - } - return (lfence_works); -} - -static u_int -__vdso_gettc_low(const struct vdso_timehands *th) -{ - u_int rv; - - if (get_lfence_usage() == 1) - lfence(); - __asm __volatile("rdtsc; shrd %%cl, %%edx, %0" - : "=a" (rv) : "c" (th->th_x86_shift) : "edx"); - return (rv); -} - -static u_int -__vdso_rdtsc32(void) -{ - u_int rv; - - if (get_lfence_usage() == 1) - lfence(); - rv = rdtsc32(); - return (rv); -} - -#pragma weak __vdso_gettc -u_int -__vdso_gettc(const struct vdso_timehands *th) -{ - - return (th->th_x86_shift > 0 ? __vdso_gettc_low(th) : - __vdso_rdtsc32()); -} - -#pragma weak __vdso_gettimekeep -int -__vdso_gettimekeep(struct vdso_timekeep **tk) -{ - - return (_elf_aux_info(AT_TIMEKEEP, tk, sizeof(*tk))); -} Property changes on: head/lib/libc/i386/sys/__vdso_gettc.c ___________________________________________________________________ Deleted: svn:keywords ## -1 +0,0 ## -FreeBSD=%H \ No newline at end of property Index: head/lib/libc/i386/sys/Makefile.inc =================================================================== --- head/lib/libc/i386/sys/Makefile.inc (revision 304284) +++ head/lib/libc/i386/sys/Makefile.inc (revision 304285) @@ -1,24 +1,23 @@ # from: Makefile.inc,v 1.1 1993/09/03 19:04:23 jtc Exp # $FreeBSD$ .if !defined(COMPAT_32BIT) SRCS+= i386_clr_watch.c i386_set_watch.c i386_vm86.c .endif SRCS+= i386_get_fsbase.c i386_get_gsbase.c i386_get_ioperm.c i386_get_ldt.c \ - i386_set_fsbase.c i386_set_gsbase.c i386_set_ioperm.c i386_set_ldt.c \ - __vdso_gettc.c + i386_set_fsbase.c i386_set_gsbase.c i386_set_ioperm.c i386_set_ldt.c MDASM= Ovfork.S brk.S cerror.S exect.S getcontext.S ptrace.S \ sbrk.S setlogin.S sigreturn.S syscall.S # Don't generate default code for these syscalls: NOASM= break.o exit.o getlogin.o openbsd_poll.o sstk.o vfork.o yield.o PSEUDO= _getlogin.o _exit.o MAN+= i386_get_ioperm.2 i386_get_ldt.2 i386_vm86.2 MAN+= i386_set_watch.3 MLINKS+=i386_get_ioperm.2 i386_set_ioperm.2 MLINKS+=i386_get_ldt.2 i386_set_ldt.2 MLINKS+=i386_set_watch.3 i386_clr_watch.3 Index: head/lib/libc/sys/__vdso_gettimeofday.c =================================================================== --- head/lib/libc/sys/__vdso_gettimeofday.c (revision 304284) +++ head/lib/libc/sys/__vdso_gettimeofday.c (revision 304285) @@ -1,147 +1,156 @@ /*- * Copyright (c) 2012 Konstantin Belousov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include "libc_private.h" -static u_int -tc_delta(const struct vdso_timehands *th) +static int +tc_delta(const struct vdso_timehands *th, u_int *delta) { + int error; + u_int tc; - return ((__vdso_gettc(th) - th->th_offset_count) & - th->th_counter_mask); + error = __vdso_gettc(th, &tc); + if (error == 0) + *delta = (tc - th->th_offset_count) & th->th_counter_mask; + return (error); } /* * Calculate the absolute or boot-relative time from the * machine-specific fast timecounter and the published timehands * structure read from the shared page. * * The lockless reading scheme is similar to the one used to read the * in-kernel timehands, see sys/kern/kern_tc.c:binuptime(). This code * is based on the kernel implementation. */ static int binuptime(struct bintime *bt, struct vdso_timekeep *tk, int abs) { struct vdso_timehands *th; uint32_t curr, gen; + u_int delta; + int error; do { if (!tk->tk_enabled) return (ENOSYS); curr = atomic_load_acq_32(&tk->tk_current); th = &tk->tk_th[curr]; - if (th->th_algo != VDSO_TH_ALGO_1) - return (ENOSYS); gen = atomic_load_acq_32(&th->th_gen); *bt = th->th_offset; - bintime_addx(bt, th->th_scale * tc_delta(th)); + error = tc_delta(th, &delta); + if (error == EAGAIN) + continue; + if (error != 0) + return (error); + bintime_addx(bt, th->th_scale * delta); if (abs) bintime_add(bt, &th->th_boottime); /* * Ensure that the load of th_offset is completed * before the load of th_gen. */ atomic_thread_fence_acq(); } while (curr != tk->tk_current || gen == 0 || gen != th->th_gen); return (0); } static struct vdso_timekeep *tk; #pragma weak __vdso_gettimeofday int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz) { struct bintime bt; int error; if (tz != NULL) return (ENOSYS); if (tk == NULL) { error = __vdso_gettimekeep(&tk); if (error != 0 || tk == NULL) return (ENOSYS); } if (tk->tk_ver != VDSO_TK_VER_CURR) return (ENOSYS); error = binuptime(&bt, tk, 1); if (error != 0) return (error); bintime2timeval(&bt, tv); return (0); } #pragma weak __vdso_clock_gettime int __vdso_clock_gettime(clockid_t clock_id, struct timespec *ts) { struct bintime bt; int abs, error; if (tk == NULL) { error = _elf_aux_info(AT_TIMEKEEP, &tk, sizeof(tk)); if (error != 0 || tk == NULL) return (ENOSYS); } if (tk->tk_ver != VDSO_TK_VER_CURR) return (ENOSYS); switch (clock_id) { case CLOCK_REALTIME: case CLOCK_REALTIME_PRECISE: case CLOCK_REALTIME_FAST: case CLOCK_SECOND: abs = 1; break; case CLOCK_MONOTONIC: case CLOCK_MONOTONIC_PRECISE: case CLOCK_MONOTONIC_FAST: case CLOCK_UPTIME: case CLOCK_UPTIME_PRECISE: case CLOCK_UPTIME_FAST: abs = 0; break; default: return (ENOSYS); } error = binuptime(&bt, tk, abs); if (error != 0) return (error); bintime2timespec(&bt, ts); if (clock_id == CLOCK_SECOND) ts->tv_nsec = 0; return (0); } Index: head/lib/libc/sys/trivial-vdso_tc.c =================================================================== --- head/lib/libc/sys/trivial-vdso_tc.c (revision 304284) +++ head/lib/libc/sys/trivial-vdso_tc.c (revision 304285) @@ -1,48 +1,48 @@ /*- * Copyright (c) 2013 Konstantin Belousov * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include #include #include #include #pragma weak __vdso_gettc -u_int -__vdso_gettc(const struct vdso_timehands *th) +int +__vdso_gettc(const struct vdso_timehands *th, u_int *tc) { - return (0); + return (ENOSYS); } #pragma weak __vdso_gettimekeep int __vdso_gettimekeep(struct vdso_timekeep **tk) { return (ENOSYS); } Index: head/lib/libc/x86/sys/Makefile.inc =================================================================== --- head/lib/libc/x86/sys/Makefile.inc (nonexistent) +++ head/lib/libc/x86/sys/Makefile.inc (revision 304285) @@ -0,0 +1,6 @@ +# $FreeBSD$ + +.PATH: ${LIBC_SRCTOP}/x86/sys + +SRCS+= \ + __vdso_gettc.c Property changes on: head/lib/libc/x86/sys/Makefile.inc ___________________________________________________________________ Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Added: svn:mime-type ## -0,0 +1 ## +text/plain \ No newline at end of property Index: head/lib/libc/x86/sys/__vdso_gettc.c =================================================================== --- head/lib/libc/x86/sys/__vdso_gettc.c (nonexistent) +++ head/lib/libc/x86/sys/__vdso_gettc.c (revision 304285) @@ -0,0 +1,179 @@ +/*- + * Copyright (c) 2012 Konstantin Belousov + * Copyright (c) 2016 The FreeBSD Foundation + * All rights reserved. + * + * Portions of this software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include "namespace.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include "un-namespace.h" +#include +#include +#include +#include "libc_private.h" + +static void +lfence_mb(void) +{ +#if defined(__i386__) + static int lfence_works = -1; + u_int cpuid_supported, p[4]; + + if (lfence_works == -1) { + __asm __volatile( + " pushfl\n" + " popl %%eax\n" + " movl %%eax,%%ecx\n" + " xorl $0x200000,%%eax\n" + " pushl %%eax\n" + " popfl\n" + " pushfl\n" + " popl %%eax\n" + " xorl %%eax,%%ecx\n" + " je 1f\n" + " movl $1,%0\n" + " jmp 2f\n" + "1: movl $0,%0\n" + "2:\n" + : "=r" (cpuid_supported) : : "eax", "ecx", "cc"); + if (cpuid_supported) { + __asm __volatile( + " pushl %%ebx\n" + " cpuid\n" + " movl %%ebx,%1\n" + " popl %%ebx\n" + : "=a" (p[0]), "=r" (p[1]), "=c" (p[2]), "=d" (p[3]) + : "0" (0x1)); + lfence_works = (p[3] & CPUID_SSE2) != 0; + } else + lfence_works = 0; + } + if (lfence_works == 1) + lfence(); +#elif defined(__amd64__) + lfence(); +#else +#error "arch" +#endif +} + +static u_int +__vdso_gettc_rdtsc_low(const struct vdso_timehands *th) +{ + u_int rv; + + lfence_mb(); + __asm __volatile("rdtsc; shrd %%cl, %%edx, %0" + : "=a" (rv) : "c" (th->th_x86_shift) : "edx"); + return (rv); +} + +static u_int +__vdso_rdtsc32(void) +{ + + lfence_mb(); + return (rdtsc32()); +} + +static char *hpet_dev_map = NULL; +static uint32_t hpet_idx = 0xffffffff; + +static void +__vdso_init_hpet(uint32_t u) +{ + static const char devprefix[] = "/dev/hpet"; + char devname[64], *c, *c1, t; + int fd; + + c1 = c = stpcpy(devname, devprefix); + u = hpet_idx; + do { + *c++ = u % 10 + '0'; + u /= 10; + } while (u != 0); + *c = '\0'; + for (c--; c1 != c; c1++, c--) { + t = *c1; + *c1 = *c; + *c = t; + } + fd = _open(devname, O_RDONLY); + if (fd == -1) { + hpet_dev_map = MAP_FAILED; + return; + } + if (hpet_dev_map != NULL && hpet_dev_map != MAP_FAILED) + munmap(hpet_dev_map, PAGE_SIZE); + hpet_dev_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0); + _close(fd); +} + +#pragma weak __vdso_gettc +int +__vdso_gettc(const struct vdso_timehands *th, u_int *tc) +{ + uint32_t tmp; + + switch (th->th_algo) { + case VDSO_TH_ALGO_X86_TSC: + *tc = th->th_x86_shift > 0 ? __vdso_gettc_rdtsc_low(th) : + __vdso_rdtsc32(); + return (0); + case VDSO_TH_ALGO_X86_HPET: + tmp = th->th_x86_hpet_idx; + if (hpet_dev_map == NULL || tmp != hpet_idx) { + hpet_idx = tmp; + __vdso_init_hpet(hpet_idx); + } + if (hpet_dev_map == MAP_FAILED) + return (ENOSYS); + *tc = *(volatile uint32_t *)(hpet_dev_map + HPET_MAIN_COUNTER); + return (0); + default: + return (ENOSYS); + } +} + +#pragma weak __vdso_gettimekeep +int +__vdso_gettimekeep(struct vdso_timekeep **tk) +{ + + return (_elf_aux_info(AT_TIMEKEEP, tk, sizeof(*tk))); +} Property changes on: head/lib/libc/x86/sys/__vdso_gettc.c ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +FreeBSD=%H \ No newline at end of property Index: head/sys/arm/arm/generic_timer.c =================================================================== --- head/sys/arm/arm/generic_timer.c (revision 304284) +++ head/sys/arm/arm/generic_timer.c (revision 304285) @@ -1,541 +1,541 @@ /*- * Copyright (c) 2011 The FreeBSD Foundation * Copyright (c) 2013 Ruslan Bukin * All rights reserved. * * Based on mpcore_timer.c developed by Ben Gray * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ /** * Cortex-A7, Cortex-A15, ARMv8 and later Generic Timer */ #include "opt_acpi.h" #include "opt_platform.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef MULTIDELAY #include /* For arm_set_delay */ #endif #ifdef FDT #include #include #include #include #endif #ifdef DEV_ACPI #include #include #endif #define GT_CTRL_ENABLE (1 << 0) #define GT_CTRL_INT_MASK (1 << 1) #define GT_CTRL_INT_STAT (1 << 2) #define GT_REG_CTRL 0 #define GT_REG_TVAL 1 #define GT_CNTKCTL_PL0PTEN (1 << 9) /* PL0 Physical timer reg access */ #define GT_CNTKCTL_PL0VTEN (1 << 8) /* PL0 Virtual timer reg access */ #define GT_CNTKCTL_EVNTI (0xf << 4) /* Virtual counter event bits */ #define GT_CNTKCTL_EVNTDIR (1 << 3) /* Virtual counter event transition */ #define GT_CNTKCTL_EVNTEN (1 << 2) /* Enables virtual counter events */ #define GT_CNTKCTL_PL0VCTEN (1 << 1) /* PL0 CNTVCT and CNTFRQ access */ #define GT_CNTKCTL_PL0PCTEN (1 << 0) /* PL0 CNTPCT and CNTFRQ access */ struct arm_tmr_softc { struct resource *res[4]; void *ihl[4]; uint32_t clkfreq; struct eventtimer et; bool physical; }; static struct arm_tmr_softc *arm_tmr_sc = NULL; static struct resource_spec timer_spec[] = { { SYS_RES_IRQ, 0, RF_ACTIVE }, /* Secure */ { SYS_RES_IRQ, 1, RF_ACTIVE }, /* Non-secure */ { SYS_RES_IRQ, 2, RF_ACTIVE | RF_OPTIONAL }, /* Virt */ { SYS_RES_IRQ, 3, RF_ACTIVE | RF_OPTIONAL }, /* Hyp */ { -1, 0 } }; +static uint32_t arm_tmr_fill_vdso_timehands(struct vdso_timehands *vdso_th, + struct timecounter *tc); +static void arm_tmr_do_delay(int usec, void *); + static timecounter_get_t arm_tmr_get_timecount; static struct timecounter arm_tmr_timecount = { .tc_name = "ARM MPCore Timecounter", .tc_get_timecount = arm_tmr_get_timecount, .tc_poll_pps = NULL, .tc_counter_mask = ~0u, .tc_frequency = 0, .tc_quality = 1000, + .tc_fill_vdso_timehands = arm_tmr_fill_vdso_timehands, }; #ifdef __arm__ #define get_el0(x) cp15_## x ##_get() #define get_el1(x) cp15_## x ##_get() #define set_el0(x, val) cp15_## x ##_set(val) #define set_el1(x, val) cp15_## x ##_set(val) #else /* __aarch64__ */ #define get_el0(x) READ_SPECIALREG(x ##_el0) #define get_el1(x) READ_SPECIALREG(x ##_el1) #define set_el0(x, val) WRITE_SPECIALREG(x ##_el0, val) #define set_el1(x, val) WRITE_SPECIALREG(x ##_el1, val) #endif -static uint32_t arm_tmr_fill_vdso_timehands(struct vdso_timehands *vdso_th, - struct timecounter *tc); -static void arm_tmr_do_delay(int usec, void *); - static int get_freq(void) { return (get_el0(cntfrq)); } static long get_cntxct(bool physical) { uint64_t val; isb(); if (physical) val = get_el0(cntpct); else val = get_el0(cntvct); return (val); } static int set_ctrl(uint32_t val, bool physical) { if (physical) set_el0(cntp_ctl, val); else set_el0(cntv_ctl, val); isb(); return (0); } static int set_tval(uint32_t val, bool physical) { if (physical) set_el0(cntp_tval, val); else set_el0(cntv_tval, val); isb(); return (0); } static int get_ctrl(bool physical) { uint32_t val; if (physical) val = get_el0(cntp_ctl); else val = get_el0(cntv_ctl); return (val); } static void setup_user_access(void *arg __unused) { uint32_t cntkctl; cntkctl = get_el1(cntkctl); cntkctl &= ~(GT_CNTKCTL_PL0PTEN | GT_CNTKCTL_PL0VTEN | GT_CNTKCTL_EVNTEN); if (arm_tmr_sc->physical) { cntkctl |= GT_CNTKCTL_PL0PCTEN; cntkctl &= ~GT_CNTKCTL_PL0VCTEN; } else { cntkctl |= GT_CNTKCTL_PL0VCTEN; cntkctl &= ~GT_CNTKCTL_PL0PCTEN; } set_el1(cntkctl, cntkctl); isb(); } static void tmr_setup_user_access(void *arg __unused) { if (arm_tmr_sc != NULL) smp_rendezvous(NULL, setup_user_access, NULL, NULL); } SYSINIT(tmr_ua, SI_SUB_SMP, SI_ORDER_SECOND, tmr_setup_user_access, NULL); static unsigned arm_tmr_get_timecount(struct timecounter *tc) { return (get_cntxct(arm_tmr_sc->physical)); } static int arm_tmr_start(struct eventtimer *et, sbintime_t first, sbintime_t period __unused) { struct arm_tmr_softc *sc; int counts, ctrl; sc = (struct arm_tmr_softc *)et->et_priv; if (first != 0) { counts = ((uint32_t)et->et_frequency * first) >> 32; ctrl = get_ctrl(sc->physical); ctrl &= ~GT_CTRL_INT_MASK; ctrl |= GT_CTRL_ENABLE; set_tval(counts, sc->physical); set_ctrl(ctrl, sc->physical); return (0); } return (EINVAL); } static int arm_tmr_stop(struct eventtimer *et) { struct arm_tmr_softc *sc; int ctrl; sc = (struct arm_tmr_softc *)et->et_priv; ctrl = get_ctrl(sc->physical); ctrl &= GT_CTRL_ENABLE; set_ctrl(ctrl, sc->physical); return (0); } static int arm_tmr_intr(void *arg) { struct arm_tmr_softc *sc; int ctrl; sc = (struct arm_tmr_softc *)arg; ctrl = get_ctrl(sc->physical); if (ctrl & GT_CTRL_INT_STAT) { ctrl |= GT_CTRL_INT_MASK; set_ctrl(ctrl, sc->physical); } if (sc->et.et_active) sc->et.et_event_cb(&sc->et, sc->et.et_arg); return (FILTER_HANDLED); } #ifdef FDT static int arm_tmr_fdt_probe(device_t dev) { if (!ofw_bus_status_okay(dev)) return (ENXIO); if (ofw_bus_is_compatible(dev, "arm,armv7-timer")) { device_set_desc(dev, "ARMv7 Generic Timer"); return (BUS_PROBE_DEFAULT); } else if (ofw_bus_is_compatible(dev, "arm,armv8-timer")) { device_set_desc(dev, "ARMv8 Generic Timer"); return (BUS_PROBE_DEFAULT); } return (ENXIO); } #endif #ifdef DEV_ACPI static void arm_tmr_acpi_identify(driver_t *driver, device_t parent) { ACPI_TABLE_GTDT *gtdt; vm_paddr_t physaddr; device_t dev; physaddr = acpi_find_table(ACPI_SIG_GTDT); if (physaddr == 0) return; gtdt = acpi_map_table(physaddr, ACPI_SIG_GTDT); if (gtdt == NULL) { device_printf(parent, "gic: Unable to map the GTDT\n"); return; } dev = BUS_ADD_CHILD(parent, BUS_PASS_TIMER + BUS_PASS_ORDER_MIDDLE, "generic_timer", -1); if (dev == NULL) { device_printf(parent, "add gic child failed\n"); goto out; } BUS_SET_RESOURCE(parent, dev, SYS_RES_IRQ, 0, gtdt->SecureEl1Interrupt, 1); BUS_SET_RESOURCE(parent, dev, SYS_RES_IRQ, 1, gtdt->NonSecureEl1Interrupt, 1); BUS_SET_RESOURCE(parent, dev, SYS_RES_IRQ, 2, gtdt->VirtualTimerInterrupt, 1); out: acpi_unmap_table(gtdt); } static int arm_tmr_acpi_probe(device_t dev) { device_set_desc(dev, "ARM Generic Timer"); return (BUS_PROBE_NOWILDCARD); } #endif static int arm_tmr_attach(device_t dev) { struct arm_tmr_softc *sc; #ifdef FDT phandle_t node; pcell_t clock; #endif int error; int i; sc = device_get_softc(dev); if (arm_tmr_sc) return (ENXIO); #ifdef FDT /* Get the base clock frequency */ node = ofw_bus_get_node(dev); if (node > 0) { error = OF_getencprop(node, "clock-frequency", &clock, sizeof(clock)); if (error > 0) sc->clkfreq = clock; } #endif if (sc->clkfreq == 0) { /* Try to get clock frequency from timer */ sc->clkfreq = get_freq(); } if (sc->clkfreq == 0) { device_printf(dev, "No clock frequency specified\n"); return (ENXIO); } if (bus_alloc_resources(dev, timer_spec, sc->res)) { device_printf(dev, "could not allocate resources\n"); return (ENXIO); } #ifdef __arm__ sc->physical = true; #else /* __aarch64__ */ /* If we do not have a virtual timer use the physical. */ sc->physical = (sc->res[2] == NULL) ? true : false; #endif arm_tmr_sc = sc; /* Setup secure, non-secure and virtual IRQs handler */ for (i = 0; i < 3; i++) { /* If we do not have the interrupt, skip it. */ if (sc->res[i] == NULL) continue; error = bus_setup_intr(dev, sc->res[i], INTR_TYPE_CLK, arm_tmr_intr, NULL, sc, &sc->ihl[i]); if (error) { device_printf(dev, "Unable to alloc int resource.\n"); return (ENXIO); } } - arm_cpu_fill_vdso_timehands = arm_tmr_fill_vdso_timehands; - arm_tmr_timecount.tc_frequency = sc->clkfreq; tc_init(&arm_tmr_timecount); sc->et.et_name = "ARM MPCore Eventtimer"; sc->et.et_flags = ET_FLAGS_ONESHOT | ET_FLAGS_PERCPU; sc->et.et_quality = 1000; sc->et.et_frequency = sc->clkfreq; sc->et.et_min_period = (0x00000010LLU << 32) / sc->et.et_frequency; sc->et.et_max_period = (0xfffffffeLLU << 32) / sc->et.et_frequency; sc->et.et_start = arm_tmr_start; sc->et.et_stop = arm_tmr_stop; sc->et.et_priv = sc; et_register(&sc->et); #ifdef MULTIDELAY arm_set_delay(arm_tmr_do_delay, sc); #endif return (0); } #ifdef FDT static device_method_t arm_tmr_fdt_methods[] = { DEVMETHOD(device_probe, arm_tmr_fdt_probe), DEVMETHOD(device_attach, arm_tmr_attach), { 0, 0 } }; static driver_t arm_tmr_fdt_driver = { "generic_timer", arm_tmr_fdt_methods, sizeof(struct arm_tmr_softc), }; static devclass_t arm_tmr_fdt_devclass; EARLY_DRIVER_MODULE(timer, simplebus, arm_tmr_fdt_driver, arm_tmr_fdt_devclass, 0, 0, BUS_PASS_TIMER + BUS_PASS_ORDER_MIDDLE); EARLY_DRIVER_MODULE(timer, ofwbus, arm_tmr_fdt_driver, arm_tmr_fdt_devclass, 0, 0, BUS_PASS_TIMER + BUS_PASS_ORDER_MIDDLE); #endif #ifdef DEV_ACPI static device_method_t arm_tmr_acpi_methods[] = { DEVMETHOD(device_identify, arm_tmr_acpi_identify), DEVMETHOD(device_probe, arm_tmr_acpi_probe), DEVMETHOD(device_attach, arm_tmr_attach), { 0, 0 } }; static driver_t arm_tmr_acpi_driver = { "generic_timer", arm_tmr_acpi_methods, sizeof(struct arm_tmr_softc), }; static devclass_t arm_tmr_acpi_devclass; EARLY_DRIVER_MODULE(timer, acpi, arm_tmr_acpi_driver, arm_tmr_acpi_devclass, 0, 0, BUS_PASS_TIMER + BUS_PASS_ORDER_MIDDLE); #endif static void arm_tmr_do_delay(int usec, void *arg) { struct arm_tmr_softc *sc = arg; int32_t counts, counts_per_usec; uint32_t first, last; /* Get the number of times to count */ counts_per_usec = ((arm_tmr_timecount.tc_frequency / 1000000) + 1); /* * Clamp the timeout at a maximum value (about 32 seconds with * a 66MHz clock). *Nobody* should be delay()ing for anywhere * near that length of time and if they are, they should be hung * out to dry. */ if (usec >= (0x80000000U / counts_per_usec)) counts = (0x80000000U / counts_per_usec) - 1; else counts = usec * counts_per_usec; first = get_cntxct(sc->physical); while (counts > 0) { last = get_cntxct(sc->physical); counts -= (int32_t)(last - first); first = last; } } #ifndef MULTIDELAY void DELAY(int usec) { int32_t counts; /* * Check the timers are setup, if not just * use a for loop for the meantime */ if (arm_tmr_sc == NULL) { for (; usec > 0; usec--) for (counts = 200; counts > 0; counts--) /* * Prevent the compiler from optimizing * out the loop */ cpufunc_nullop(); } else arm_tmr_do_delay(usec, arm_tmr_sc); } #endif static uint32_t arm_tmr_fill_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc) { + vdso_th->th_algo = VDSO_TH_ALGO_ARM_GENTIM; vdso_th->th_physical = arm_tmr_sc->physical; bzero(vdso_th->th_res, sizeof(vdso_th->th_res)); - return (tc == &arm_tmr_timecount); + return (1); } Index: head/sys/arm/arm/machdep.c =================================================================== --- head/sys/arm/arm/machdep.c (revision 304284) +++ head/sys/arm/arm/machdep.c (revision 304285) @@ -1,2016 +1,2005 @@ /* $NetBSD: arm32_machdep.c,v 1.44 2004/03/24 15:34:47 atatat Exp $ */ /*- * Copyright (c) 2004 Olivier Houchard * Copyright (c) 1994-1998 Mark Brinicombe. * Copyright (c) 1994 Brini. * All rights reserved. * * This code is derived from software written for Brini by Mark Brinicombe * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by Mark Brinicombe * for the NetBSD Project. * 4. The name of the company nor the name of the author may be used to * endorse or promote products derived from this software without specific * prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * Machine dependent functions for kernel setup * * Created : 17/09/94 * Updated : 18/04/01 updated for new wscons */ #include "opt_compat.h" #include "opt_ddb.h" #include "opt_kstack_pages.h" #include "opt_platform.h" #include "opt_sched.h" #include "opt_timer.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef FDT #include #include #include #endif #ifdef DDB #include #if __ARM_ARCH >= 6 DB_SHOW_COMMAND(cp15, db_show_cp15) { u_int reg; reg = cp15_midr_get(); db_printf("Cpu ID: 0x%08x\n", reg); reg = cp15_ctr_get(); db_printf("Current Cache Lvl ID: 0x%08x\n",reg); reg = cp15_sctlr_get(); db_printf("Ctrl: 0x%08x\n",reg); reg = cp15_actlr_get(); db_printf("Aux Ctrl: 0x%08x\n",reg); reg = cp15_id_pfr0_get(); db_printf("Processor Feat 0: 0x%08x\n", reg); reg = cp15_id_pfr1_get(); db_printf("Processor Feat 1: 0x%08x\n", reg); reg = cp15_id_dfr0_get(); db_printf("Debug Feat 0: 0x%08x\n", reg); reg = cp15_id_afr0_get(); db_printf("Auxiliary Feat 0: 0x%08x\n", reg); reg = cp15_id_mmfr0_get(); db_printf("Memory Model Feat 0: 0x%08x\n", reg); reg = cp15_id_mmfr1_get(); db_printf("Memory Model Feat 1: 0x%08x\n", reg); reg = cp15_id_mmfr2_get(); db_printf("Memory Model Feat 2: 0x%08x\n", reg); reg = cp15_id_mmfr3_get(); db_printf("Memory Model Feat 3: 0x%08x\n", reg); reg = cp15_ttbr_get(); db_printf("TTB0: 0x%08x\n", reg); } DB_SHOW_COMMAND(vtop, db_show_vtop) { u_int reg; if (have_addr) { cp15_ats1cpr_set(addr); reg = cp15_par_get(); db_printf("Physical address reg: 0x%08x\n",reg); } else db_printf("show vtop \n"); } #endif /* __ARM_ARCH >= 6 */ #endif /* DDB */ #ifdef DEBUG #define debugf(fmt, args...) printf(fmt, ##args) #else #define debugf(fmt, args...) #endif #if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \ defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7) || \ defined(COMPAT_FREEBSD9) #error FreeBSD/arm doesn't provide compatibility with releases prior to 10 #endif struct pcpu __pcpu[MAXCPU]; struct pcpu *pcpup = &__pcpu[0]; static struct trapframe proc0_tf; uint32_t cpu_reset_address = 0; int cold = 1; vm_offset_t vector_page; int (*_arm_memcpy)(void *, void *, int, int) = NULL; int (*_arm_bzero)(void *, int, int) = NULL; int _min_memcpy_size = 0; int _min_bzero_size = 0; extern int *end; #ifdef FDT static char *loader_envp; vm_paddr_t pmap_pa; #if __ARM_ARCH >= 6 vm_offset_t systempage; vm_offset_t irqstack; vm_offset_t undstack; vm_offset_t abtstack; #else /* * This is the number of L2 page tables required for covering max * (hypothetical) memsize of 4GB and all kernel mappings (vectors, msgbuf, * stacks etc.), uprounded to be divisible by 4. */ #define KERNEL_PT_MAX 78 static struct pv_addr kernel_pt_table[KERNEL_PT_MAX]; struct pv_addr systempage; static struct pv_addr msgbufpv; struct pv_addr irqstack; struct pv_addr undstack; struct pv_addr abtstack; static struct pv_addr kernelstack; #endif #endif #if defined(LINUX_BOOT_ABI) #define LBABI_MAX_BANKS 10 #define CMDLINE_GUARD "FreeBSD:" uint32_t board_id; struct arm_lbabi_tag *atag_list; char linux_command_line[LBABI_MAX_COMMAND_LINE + 1]; char atags[LBABI_MAX_COMMAND_LINE * 2]; uint32_t memstart[LBABI_MAX_BANKS]; uint32_t memsize[LBABI_MAX_BANKS]; uint32_t membanks; #endif #ifdef MULTIDELAY static delay_func *delay_impl; static void *delay_arg; #endif static uint32_t board_revision; /* hex representation of uint64_t */ static char board_serial[32]; SYSCTL_NODE(_hw, OID_AUTO, board, CTLFLAG_RD, 0, "Board attributes"); SYSCTL_UINT(_hw_board, OID_AUTO, revision, CTLFLAG_RD, &board_revision, 0, "Board revision"); SYSCTL_STRING(_hw_board, OID_AUTO, serial, CTLFLAG_RD, board_serial, 0, "Board serial"); int vfp_exists; SYSCTL_INT(_hw, HW_FLOATINGPT, floatingpoint, CTLFLAG_RD, &vfp_exists, 0, "Floating point support enabled"); void board_set_serial(uint64_t serial) { snprintf(board_serial, sizeof(board_serial)-1, "%016jx", serial); } void board_set_revision(uint32_t revision) { board_revision = revision; } void sendsig(catcher, ksi, mask) sig_t catcher; ksiginfo_t *ksi; sigset_t *mask; { struct thread *td; struct proc *p; struct trapframe *tf; struct sigframe *fp, frame; struct sigacts *psp; struct sysentvec *sysent; int onstack; int sig; int code; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; code = ksi->ksi_code; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; onstack = sigonstack(tf->tf_usr_sp); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !(onstack) && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else fp = (struct sigframe *)td->td_frame->tf_usr_sp; /* make room on the stack */ fp--; /* make the stack aligned */ fp = (struct sigframe *)STACKALIGN(fp); /* Populate the siginfo frame. */ get_mcontext(td, &frame.sf_uc.uc_mcontext, 0); frame.sf_si = ksi->ksi_info; frame.sf_uc.uc_sigmask = *mask; frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK ) ? ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE; frame.sf_uc.uc_stack = td->td_sigstk; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(td->td_proc); /* Copy the sigframe out to the user's stack. */ if (copyout(&frame, fp, sizeof(*fp)) != 0) { /* Process has trashed its stack. Kill it. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp); PROC_LOCK(p); sigexit(td, SIGILL); } /* * Build context to run handler in. We invoke the handler * directly, only returning via the trampoline. Note the * trampoline version numbers are coordinated with machine- * dependent code in libc. */ tf->tf_r0 = sig; tf->tf_r1 = (register_t)&fp->sf_si; tf->tf_r2 = (register_t)&fp->sf_uc; /* the trampoline uses r5 as the uc address */ tf->tf_r5 = (register_t)&fp->sf_uc; tf->tf_pc = (register_t)catcher; tf->tf_usr_sp = (register_t)fp; sysent = p->p_sysent; if (sysent->sv_sigcode_base != 0) tf->tf_usr_lr = (register_t)sysent->sv_sigcode_base; else tf->tf_usr_lr = (register_t)(sysent->sv_psstrings - *(sysent->sv_szsigcode)); /* Set the mode to enter in the signal handler */ #if __ARM_ARCH >= 7 if ((register_t)catcher & 1) tf->tf_spsr |= PSR_T; else tf->tf_spsr &= ~PSR_T; #endif CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_usr_lr, tf->tf_usr_sp); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } struct kva_md_info kmi; /* * arm32_vector_init: * * Initialize the vector page, and select whether or not to * relocate the vectors. * * NOTE: We expect the vector page to be mapped at its expected * destination. */ extern unsigned int page0[], page0_data[]; void arm_vector_init(vm_offset_t va, int which) { unsigned int *vectors = (int *) va; unsigned int *vectors_data = vectors + (page0_data - page0); int vec; /* * Loop through the vectors we're taking over, and copy the * vector's insn and data word. */ for (vec = 0; vec < ARM_NVEC; vec++) { if ((which & (1 << vec)) == 0) { /* Don't want to take over this vector. */ continue; } vectors[vec] = page0[vec]; vectors_data[vec] = page0_data[vec]; } /* Now sync the vectors. */ icache_sync(va, (ARM_NVEC * 2) * sizeof(u_int)); vector_page = va; if (va == ARM_VECTORS_HIGH) { /* * Enable high vectors in the system control reg (SCTLR). * * Assume the MD caller knows what it's doing here, and really * does want the vector page relocated. * * Note: This has to be done here (and not just in * cpu_setup()) because the vector page needs to be * accessible *before* cpu_startup() is called. * Think ddb(9) ... */ cpu_control(CPU_CONTROL_VECRELOC, CPU_CONTROL_VECRELOC); } } static void cpu_startup(void *dummy) { struct pcb *pcb = thread0.td_pcb; const unsigned int mbyte = 1024 * 1024; #if __ARM_ARCH < 6 && !defined(ARM_CACHE_LOCK_ENABLE) vm_page_t m; #endif identify_arm_cpu(); vm_ksubmap_init(&kmi); /* * Display the RAM layout. */ printf("real memory = %ju (%ju MB)\n", (uintmax_t)arm32_ptob(realmem), (uintmax_t)arm32_ptob(realmem) / mbyte); printf("avail memory = %ju (%ju MB)\n", (uintmax_t)arm32_ptob(vm_cnt.v_free_count), (uintmax_t)arm32_ptob(vm_cnt.v_free_count) / mbyte); if (bootverbose) { arm_physmem_print_tables(); devmap_print_table(); } bufinit(); vm_pager_bufferinit(); pcb->pcb_regs.sf_sp = (u_int)thread0.td_kstack + USPACE_SVC_STACK_TOP; pmap_set_pcb_pagedir(kernel_pmap, pcb); #if __ARM_ARCH < 6 vector_page_setprot(VM_PROT_READ); pmap_postinit(); #ifdef ARM_CACHE_LOCK_ENABLE pmap_kenter_user(ARM_TP_ADDRESS, ARM_TP_ADDRESS); arm_lock_cache_line(ARM_TP_ADDRESS); #else m = vm_page_alloc(NULL, 0, VM_ALLOC_NOOBJ | VM_ALLOC_ZERO); pmap_kenter_user(ARM_TP_ADDRESS, VM_PAGE_TO_PHYS(m)); #endif *(uint32_t *)ARM_RAS_START = 0; *(uint32_t *)ARM_RAS_END = 0xffffffff; #endif } SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); /* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */ void cpu_flush_dcache(void *ptr, size_t len) { dcache_wb_poc((vm_offset_t)ptr, (vm_paddr_t)vtophys(ptr), len); } /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { return (ENXIO); } void cpu_idle(int busy) { CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu); spinlock_enter(); #ifndef NO_EVENTTIMERS if (!busy) cpu_idleclock(); #endif if (!sched_runnable()) cpu_sleep(0); #ifndef NO_EVENTTIMERS if (!busy) cpu_activeclock(); #endif spinlock_exit(); CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", busy, curcpu); } int cpu_idle_wakeup(int cpu) { return (0); } /* * Most ARM platforms don't need to do anything special to init their clocks * (they get intialized during normal device attachment), and by not defining a * cpu_initclocks() function they get this generic one. Any platform that needs * to do something special can just provide their own implementation, which will * override this one due to the weak linkage. */ void arm_generic_initclocks(void) { #ifndef NO_EVENTTIMERS #ifdef SMP if (PCPU_GET(cpuid) == 0) cpu_initclocks_bsp(); else cpu_initclocks_ap(); #else cpu_initclocks_bsp(); #endif #endif } __weak_reference(arm_generic_initclocks, cpu_initclocks); #ifdef MULTIDELAY void arm_set_delay(delay_func *impl, void *arg) { KASSERT(impl != NULL, ("No DELAY implementation")); delay_impl = impl; delay_arg = arg; } void DELAY(int usec) { delay_impl(usec, delay_arg); } #endif int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *tf = td->td_frame; bcopy(&tf->tf_r0, regs->r, sizeof(regs->r)); regs->r_sp = tf->tf_usr_sp; regs->r_lr = tf->tf_usr_lr; regs->r_pc = tf->tf_pc; regs->r_cpsr = tf->tf_spsr; return (0); } int fill_fpregs(struct thread *td, struct fpreg *regs) { bzero(regs, sizeof(*regs)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *tf = td->td_frame; bcopy(regs->r, &tf->tf_r0, sizeof(regs->r)); tf->tf_usr_sp = regs->r_sp; tf->tf_usr_lr = regs->r_lr; tf->tf_pc = regs->r_pc; tf->tf_spsr &= ~PSR_FLAGS; tf->tf_spsr |= regs->r_cpsr & PSR_FLAGS; return (0); } int set_fpregs(struct thread *td, struct fpreg *regs) { return (0); } int fill_dbregs(struct thread *td, struct dbreg *regs) { return (0); } int set_dbregs(struct thread *td, struct dbreg *regs) { return (0); } static int ptrace_read_int(struct thread *td, vm_offset_t addr, uint32_t *v) { if (proc_readmem(td, td->td_proc, addr, v, sizeof(*v)) != sizeof(*v)) return (ENOMEM); return (0); } static int ptrace_write_int(struct thread *td, vm_offset_t addr, uint32_t v) { if (proc_writemem(td, td->td_proc, addr, &v, sizeof(v)) != sizeof(v)) return (ENOMEM); return (0); } static u_int ptrace_get_usr_reg(void *cookie, int reg) { int ret; struct thread *td = cookie; KASSERT(((reg >= 0) && (reg <= ARM_REG_NUM_PC)), ("reg is outside range")); switch(reg) { case ARM_REG_NUM_PC: ret = td->td_frame->tf_pc; break; case ARM_REG_NUM_LR: ret = td->td_frame->tf_usr_lr; break; case ARM_REG_NUM_SP: ret = td->td_frame->tf_usr_sp; break; default: ret = *((register_t*)&td->td_frame->tf_r0 + reg); break; } return (ret); } static u_int ptrace_get_usr_int(void* cookie, vm_offset_t offset, u_int* val) { struct thread *td = cookie; u_int error; error = ptrace_read_int(td, offset, val); return (error); } /** * This function parses current instruction opcode and decodes * any possible jump (change in PC) which might occur after * the instruction is executed. * * @param td Thread structure of analysed task * @param cur_instr Currently executed instruction * @param alt_next_address Pointer to the variable where * the destination address of the * jump instruction shall be stored. * * @return <0> when jump is possible * otherwise */ static int ptrace_get_alternative_next(struct thread *td, uint32_t cur_instr, uint32_t *alt_next_address) { int error; if (inst_branch(cur_instr) || inst_call(cur_instr) || inst_return(cur_instr)) { error = arm_predict_branch(td, cur_instr, td->td_frame->tf_pc, alt_next_address, ptrace_get_usr_reg, ptrace_get_usr_int); return (error); } return (EINVAL); } int ptrace_single_step(struct thread *td) { struct proc *p; int error, error_alt; uint32_t cur_instr, alt_next = 0; /* TODO: This needs to be updated for Thumb-2 */ if ((td->td_frame->tf_spsr & PSR_T) != 0) return (EINVAL); KASSERT(td->td_md.md_ptrace_instr == 0, ("Didn't clear single step")); KASSERT(td->td_md.md_ptrace_instr_alt == 0, ("Didn't clear alternative single step")); p = td->td_proc; PROC_UNLOCK(p); error = ptrace_read_int(td, td->td_frame->tf_pc, &cur_instr); if (error) goto out; error = ptrace_read_int(td, td->td_frame->tf_pc + INSN_SIZE, &td->td_md.md_ptrace_instr); if (error == 0) { error = ptrace_write_int(td, td->td_frame->tf_pc + INSN_SIZE, PTRACE_BREAKPOINT); if (error) { td->td_md.md_ptrace_instr = 0; } else { td->td_md.md_ptrace_addr = td->td_frame->tf_pc + INSN_SIZE; } } error_alt = ptrace_get_alternative_next(td, cur_instr, &alt_next); if (error_alt == 0) { error_alt = ptrace_read_int(td, alt_next, &td->td_md.md_ptrace_instr_alt); if (error_alt) { td->td_md.md_ptrace_instr_alt = 0; } else { error_alt = ptrace_write_int(td, alt_next, PTRACE_BREAKPOINT); if (error_alt) td->td_md.md_ptrace_instr_alt = 0; else td->td_md.md_ptrace_addr_alt = alt_next; } } out: PROC_LOCK(p); return ((error != 0) && (error_alt != 0)); } int ptrace_clear_single_step(struct thread *td) { struct proc *p; /* TODO: This needs to be updated for Thumb-2 */ if ((td->td_frame->tf_spsr & PSR_T) != 0) return (EINVAL); if (td->td_md.md_ptrace_instr != 0) { p = td->td_proc; PROC_UNLOCK(p); ptrace_write_int(td, td->td_md.md_ptrace_addr, td->td_md.md_ptrace_instr); PROC_LOCK(p); td->td_md.md_ptrace_instr = 0; } if (td->td_md.md_ptrace_instr_alt != 0) { p = td->td_proc; PROC_UNLOCK(p); ptrace_write_int(td, td->td_md.md_ptrace_addr_alt, td->td_md.md_ptrace_instr_alt); PROC_LOCK(p); td->td_md.md_ptrace_instr_alt = 0; } return (0); } int ptrace_set_pc(struct thread *td, unsigned long addr) { td->td_frame->tf_pc = addr; return (0); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { } void spinlock_enter(void) { struct thread *td; register_t cspr; td = curthread; if (td->td_md.md_spinlock_count == 0) { cspr = disable_interrupts(PSR_I | PSR_F); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_cspr = cspr; } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t cspr; td = curthread; critical_exit(); cspr = td->td_md.md_saved_cspr; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) restore_interrupts(cspr); } /* * Clear registers on exec */ void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *tf = td->td_frame; memset(tf, 0, sizeof(*tf)); tf->tf_usr_sp = stack; tf->tf_usr_lr = imgp->entry_addr; tf->tf_svc_lr = 0x77777777; tf->tf_pc = imgp->entry_addr; tf->tf_spsr = PSR_USR32_MODE; } /* * Get machine context. */ int get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret) { struct trapframe *tf = td->td_frame; __greg_t *gr = mcp->__gregs; if (clear_ret & GET_MC_CLEAR_RET) { gr[_REG_R0] = 0; gr[_REG_CPSR] = tf->tf_spsr & ~PSR_C; } else { gr[_REG_R0] = tf->tf_r0; gr[_REG_CPSR] = tf->tf_spsr; } gr[_REG_R1] = tf->tf_r1; gr[_REG_R2] = tf->tf_r2; gr[_REG_R3] = tf->tf_r3; gr[_REG_R4] = tf->tf_r4; gr[_REG_R5] = tf->tf_r5; gr[_REG_R6] = tf->tf_r6; gr[_REG_R7] = tf->tf_r7; gr[_REG_R8] = tf->tf_r8; gr[_REG_R9] = tf->tf_r9; gr[_REG_R10] = tf->tf_r10; gr[_REG_R11] = tf->tf_r11; gr[_REG_R12] = tf->tf_r12; gr[_REG_SP] = tf->tf_usr_sp; gr[_REG_LR] = tf->tf_usr_lr; gr[_REG_PC] = tf->tf_pc; return (0); } /* * Set machine context. * * However, we don't set any but the user modifiable flags, and we won't * touch the cs selector. */ int set_mcontext(struct thread *td, mcontext_t *mcp) { struct trapframe *tf = td->td_frame; const __greg_t *gr = mcp->__gregs; tf->tf_r0 = gr[_REG_R0]; tf->tf_r1 = gr[_REG_R1]; tf->tf_r2 = gr[_REG_R2]; tf->tf_r3 = gr[_REG_R3]; tf->tf_r4 = gr[_REG_R4]; tf->tf_r5 = gr[_REG_R5]; tf->tf_r6 = gr[_REG_R6]; tf->tf_r7 = gr[_REG_R7]; tf->tf_r8 = gr[_REG_R8]; tf->tf_r9 = gr[_REG_R9]; tf->tf_r10 = gr[_REG_R10]; tf->tf_r11 = gr[_REG_R11]; tf->tf_r12 = gr[_REG_R12]; tf->tf_usr_sp = gr[_REG_SP]; tf->tf_usr_lr = gr[_REG_LR]; tf->tf_pc = gr[_REG_PC]; tf->tf_spsr = gr[_REG_CPSR]; return (0); } /* * MPSAFE */ int sys_sigreturn(td, uap) struct thread *td; struct sigreturn_args /* { const struct __ucontext *sigcntxp; } */ *uap; { ucontext_t uc; int spsr; if (uap == NULL) return (EFAULT); if (copyin(uap->sigcntxp, &uc, sizeof(uc))) return (EFAULT); /* * Make sure the processor mode has not been tampered with and * interrupts have not been disabled. */ spsr = uc.uc_mcontext.__gregs[_REG_CPSR]; if ((spsr & PSR_MODE) != PSR_USR32_MODE || (spsr & (PSR_I | PSR_F)) != 0) return (EINVAL); /* Restore register context. */ set_mcontext(td, &uc.uc_mcontext); /* Restore signal mask. */ kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); return (EJUSTRETURN); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { pcb->pcb_regs.sf_r4 = tf->tf_r4; pcb->pcb_regs.sf_r5 = tf->tf_r5; pcb->pcb_regs.sf_r6 = tf->tf_r6; pcb->pcb_regs.sf_r7 = tf->tf_r7; pcb->pcb_regs.sf_r8 = tf->tf_r8; pcb->pcb_regs.sf_r9 = tf->tf_r9; pcb->pcb_regs.sf_r10 = tf->tf_r10; pcb->pcb_regs.sf_r11 = tf->tf_r11; pcb->pcb_regs.sf_r12 = tf->tf_r12; pcb->pcb_regs.sf_pc = tf->tf_pc; pcb->pcb_regs.sf_lr = tf->tf_usr_lr; pcb->pcb_regs.sf_sp = tf->tf_usr_sp; } /* * Fake up a boot descriptor table */ vm_offset_t fake_preload_metadata(struct arm_boot_params *abp __unused, void *dtb_ptr, size_t dtb_size) { #ifdef DDB vm_offset_t zstart = 0, zend = 0; #endif vm_offset_t lastaddr; int i = 0; static uint32_t fake_preload[35]; fake_preload[i++] = MODINFO_NAME; fake_preload[i++] = strlen("kernel") + 1; strcpy((char*)&fake_preload[i++], "kernel"); i += 1; fake_preload[i++] = MODINFO_TYPE; fake_preload[i++] = strlen("elf kernel") + 1; strcpy((char*)&fake_preload[i++], "elf kernel"); i += 2; fake_preload[i++] = MODINFO_ADDR; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = KERNVIRTADDR; fake_preload[i++] = MODINFO_SIZE; fake_preload[i++] = sizeof(uint32_t); fake_preload[i++] = (uint32_t)&end - KERNVIRTADDR; #ifdef DDB if (*(uint32_t *)KERNVIRTADDR == MAGIC_TRAMP_NUMBER) { fake_preload[i++] = MODINFO_METADATA|MODINFOMD_SSYM; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 4); fake_preload[i++] = MODINFO_METADATA|MODINFOMD_ESYM; fake_preload[i++] = sizeof(vm_offset_t); fake_preload[i++] = *(uint32_t *)(KERNVIRTADDR + 8); lastaddr = *(uint32_t *)(KERNVIRTADDR + 8); zend = lastaddr; zstart = *(uint32_t *)(KERNVIRTADDR + 4); db_fetch_ksymtab(zstart, zend); } else #endif lastaddr = (vm_offset_t)&end; if (dtb_ptr != NULL) { /* Copy DTB to KVA space and insert it into module chain. */ lastaddr = roundup(lastaddr, sizeof(int)); fake_preload[i++] = MODINFO_METADATA | MODINFOMD_DTBP; fake_preload[i++] = sizeof(uint32_t); fake_preload[i++] = (uint32_t)lastaddr; memmove((void *)lastaddr, dtb_ptr, dtb_size); lastaddr += dtb_size; lastaddr = roundup(lastaddr, sizeof(int)); } fake_preload[i++] = 0; fake_preload[i] = 0; preload_metadata = (void *)fake_preload; init_static_kenv(NULL, 0); return (lastaddr); } void pcpu0_init(void) { #if __ARM_ARCH >= 6 set_curthread(&thread0); #endif pcpu_init(pcpup, 0, sizeof(struct pcpu)); PCPU_SET(curthread, &thread0); } #if defined(LINUX_BOOT_ABI) /* Convert the U-Boot command line into FreeBSD kenv and boot options. */ static void cmdline_set_env(char *cmdline, const char *guard) { char *cmdline_next, *env; size_t size, guard_len; int i; size = strlen(cmdline); /* Skip leading spaces. */ for (; isspace(*cmdline) && (size > 0); cmdline++) size--; /* Test and remove guard. */ if (guard != NULL && guard[0] != '\0') { guard_len = strlen(guard); if (strncasecmp(cmdline, guard, guard_len) != 0) return; cmdline += guard_len; size -= guard_len; } /* Skip leading spaces. */ for (; isspace(*cmdline) && (size > 0); cmdline++) size--; /* Replace ',' with '\0'. */ /* TODO: implement escaping for ',' character. */ cmdline_next = cmdline; while(strsep(&cmdline_next, ",") != NULL) ; init_static_kenv(cmdline, 0); /* Parse boothowto. */ for (i = 0; howto_names[i].ev != NULL; i++) { env = kern_getenv(howto_names[i].ev); if (env != NULL) { if (strtoul(env, NULL, 10) != 0) boothowto |= howto_names[i].mask; freeenv(env); } } } vm_offset_t linux_parse_boot_param(struct arm_boot_params *abp) { struct arm_lbabi_tag *walker; uint32_t revision; uint64_t serial; int size; vm_offset_t lastaddr; #ifdef FDT struct fdt_header *dtb_ptr; uint32_t dtb_size; #endif /* * Linux boot ABI: r0 = 0, r1 is the board type (!= 0) and r2 * is atags or dtb pointer. If all of these aren't satisfied, * then punt. Unfortunately, it looks like DT enabled kernels * doesn't uses board type and U-Boot delivers 0 in r1 for them. */ if (abp->abp_r0 != 0 || abp->abp_r2 == 0) return (0); #ifdef FDT /* Test if r2 point to valid DTB. */ dtb_ptr = (struct fdt_header *)abp->abp_r2; if (fdt_check_header(dtb_ptr) == 0) { dtb_size = fdt_totalsize(dtb_ptr); return (fake_preload_metadata(abp, dtb_ptr, dtb_size)); } #endif board_id = abp->abp_r1; walker = (struct arm_lbabi_tag *)abp->abp_r2; if (ATAG_TAG(walker) != ATAG_CORE) return 0; atag_list = walker; while (ATAG_TAG(walker) != ATAG_NONE) { switch (ATAG_TAG(walker)) { case ATAG_CORE: break; case ATAG_MEM: arm_physmem_hardware_region(walker->u.tag_mem.start, walker->u.tag_mem.size); break; case ATAG_INITRD2: break; case ATAG_SERIAL: serial = walker->u.tag_sn.high; serial <<= 32; serial |= walker->u.tag_sn.low; board_set_serial(serial); break; case ATAG_REVISION: revision = walker->u.tag_rev.rev; board_set_revision(revision); break; case ATAG_CMDLINE: size = ATAG_SIZE(walker) - sizeof(struct arm_lbabi_header); size = min(size, LBABI_MAX_COMMAND_LINE); strncpy(linux_command_line, walker->u.tag_cmd.command, size); linux_command_line[size] = '\0'; break; default: break; } walker = ATAG_NEXT(walker); } /* Save a copy for later */ bcopy(atag_list, atags, (char *)walker - (char *)atag_list + ATAG_SIZE(walker)); lastaddr = fake_preload_metadata(abp, NULL, 0); cmdline_set_env(linux_command_line, CMDLINE_GUARD); return lastaddr; } #endif #if defined(FREEBSD_BOOT_LOADER) vm_offset_t freebsd_parse_boot_param(struct arm_boot_params *abp) { vm_offset_t lastaddr = 0; void *mdp; void *kmdp; #ifdef DDB vm_offset_t ksym_start; vm_offset_t ksym_end; #endif /* * Mask metadata pointer: it is supposed to be on page boundary. If * the first argument (mdp) doesn't point to a valid address the * bootloader must have passed us something else than the metadata * ptr, so we give up. Also give up if we cannot find metadta section * the loader creates that we get all this data out of. */ if ((mdp = (void *)(abp->abp_r0 & ~PAGE_MASK)) == NULL) return 0; preload_metadata = mdp; kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) return 0; boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); loader_envp = MD_FETCH(kmdp, MODINFOMD_ENVP, char *); init_static_kenv(loader_envp, 0); lastaddr = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); #ifdef DDB ksym_start = MD_FETCH(kmdp, MODINFOMD_SSYM, uintptr_t); ksym_end = MD_FETCH(kmdp, MODINFOMD_ESYM, uintptr_t); db_fetch_ksymtab(ksym_start, ksym_end); #endif return lastaddr; } #endif vm_offset_t default_parse_boot_param(struct arm_boot_params *abp) { vm_offset_t lastaddr; #if defined(LINUX_BOOT_ABI) if ((lastaddr = linux_parse_boot_param(abp)) != 0) return lastaddr; #endif #if defined(FREEBSD_BOOT_LOADER) if ((lastaddr = freebsd_parse_boot_param(abp)) != 0) return lastaddr; #endif /* Fall back to hardcoded metadata. */ lastaddr = fake_preload_metadata(abp, NULL, 0); return lastaddr; } /* * Stub version of the boot parameter parsing routine. We are * called early in initarm, before even VM has been initialized. * This routine needs to preserve any data that the boot loader * has passed in before the kernel starts to grow past the end * of the BSS, traditionally the place boot-loaders put this data. * * Since this is called so early, things that depend on the vm system * being setup (including access to some SoC's serial ports), about * all that can be done in this routine is to copy the arguments. * * This is the default boot parameter parsing routine. Individual * kernels/boards can override this weak function with one of their * own. We just fake metadata... */ __weak_reference(default_parse_boot_param, parse_boot_param); /* * Initialize proc0 */ void init_proc0(vm_offset_t kstack) { proc_linkup0(&proc0, &thread0); thread0.td_kstack = kstack; thread0.td_pcb = (struct pcb *) (thread0.td_kstack + kstack_pages * PAGE_SIZE) - 1; thread0.td_pcb->pcb_flags = 0; thread0.td_pcb->pcb_vfpcpu = -1; thread0.td_pcb->pcb_vfpstate.fpscr = VFPSCR_DN; thread0.td_frame = &proc0_tf; pcpup->pc_curpcb = thread0.td_pcb; } int arm_predict_branch(void *cookie, u_int insn, register_t pc, register_t *new_pc, u_int (*fetch_reg)(void*, int), u_int (*read_int)(void*, vm_offset_t, u_int*)) { u_int addr, nregs, offset = 0; int error = 0; switch ((insn >> 24) & 0xf) { case 0x2: /* add pc, reg1, #value */ case 0x0: /* add pc, reg1, reg2, lsl #offset */ addr = fetch_reg(cookie, (insn >> 16) & 0xf); if (((insn >> 16) & 0xf) == 15) addr += 8; if (insn & 0x0200000) { offset = (insn >> 7) & 0x1e; offset = (insn & 0xff) << (32 - offset) | (insn & 0xff) >> offset; } else { offset = fetch_reg(cookie, insn & 0x0f); if ((insn & 0x0000ff0) != 0x00000000) { if (insn & 0x10) nregs = fetch_reg(cookie, (insn >> 8) & 0xf); else nregs = (insn >> 7) & 0x1f; switch ((insn >> 5) & 3) { case 0: /* lsl */ offset = offset << nregs; break; case 1: /* lsr */ offset = offset >> nregs; break; default: break; /* XXX */ } } *new_pc = addr + offset; return (0); } case 0xa: /* b ... */ case 0xb: /* bl ... */ addr = ((insn << 2) & 0x03ffffff); if (addr & 0x02000000) addr |= 0xfc000000; *new_pc = (pc + 8 + addr); return (0); case 0x7: /* ldr pc, [pc, reg, lsl #2] */ addr = fetch_reg(cookie, insn & 0xf); addr = pc + 8 + (addr << 2); error = read_int(cookie, addr, &addr); *new_pc = addr; return (error); case 0x1: /* mov pc, reg */ *new_pc = fetch_reg(cookie, insn & 0xf); return (0); case 0x4: case 0x5: /* ldr pc, [reg] */ addr = fetch_reg(cookie, (insn >> 16) & 0xf); /* ldr pc, [reg, #offset] */ if (insn & (1 << 24)) offset = insn & 0xfff; if (insn & 0x00800000) addr += offset; else addr -= offset; error = read_int(cookie, addr, &addr); *new_pc = addr; return (error); case 0x8: /* ldmxx reg, {..., pc} */ case 0x9: addr = fetch_reg(cookie, (insn >> 16) & 0xf); nregs = (insn & 0x5555) + ((insn >> 1) & 0x5555); nregs = (nregs & 0x3333) + ((nregs >> 2) & 0x3333); nregs = (nregs + (nregs >> 4)) & 0x0f0f; nregs = (nregs + (nregs >> 8)) & 0x001f; switch ((insn >> 23) & 0x3) { case 0x0: /* ldmda */ addr = addr - 0; break; case 0x1: /* ldmia */ addr = addr + 0 + ((nregs - 1) << 2); break; case 0x2: /* ldmdb */ addr = addr - 4; break; case 0x3: /* ldmib */ addr = addr + 4 + ((nregs - 1) << 2); break; } error = read_int(cookie, addr, &addr); *new_pc = addr; return (error); default: return (EINVAL); } } #if __ARM_ARCH >= 6 void set_stackptrs(int cpu) { set_stackptr(PSR_IRQ32_MODE, irqstack + ((IRQ_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); set_stackptr(PSR_ABT32_MODE, abtstack + ((ABT_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); set_stackptr(PSR_UND32_MODE, undstack + ((UND_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); } #else void set_stackptrs(int cpu) { set_stackptr(PSR_IRQ32_MODE, irqstack.pv_va + ((IRQ_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); set_stackptr(PSR_ABT32_MODE, abtstack.pv_va + ((ABT_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); set_stackptr(PSR_UND32_MODE, undstack.pv_va + ((UND_STACK_SIZE * PAGE_SIZE) * (cpu + 1))); } #endif #ifdef EFI #define efi_next_descriptor(ptr, size) \ ((struct efi_md *)(((uint8_t *) ptr) + size)) static void add_efi_map_entries(struct efi_map_header *efihdr, struct mem_region *mr, int *mrcnt) { struct efi_md *map, *p; const char *type; size_t efisz, memory_size; int ndesc, i, j; static const char *types[] = { "Reserved", "LoaderCode", "LoaderData", "BootServicesCode", "BootServicesData", "RuntimeServicesCode", "RuntimeServicesData", "ConventionalMemory", "UnusableMemory", "ACPIReclaimMemory", "ACPIMemoryNVS", "MemoryMappedIO", "MemoryMappedIOPortSpace", "PalCode", "PersistentMemory" }; *mrcnt = 0; /* * Memory map data provided by UEFI via the GetMemoryMap * Boot Services API. */ efisz = roundup2(sizeof(struct efi_map_header), 0x10); map = (struct efi_md *)((uint8_t *)efihdr + efisz); if (efihdr->descriptor_size == 0) return; ndesc = efihdr->memory_size / efihdr->descriptor_size; if (boothowto & RB_VERBOSE) printf("%23s %12s %12s %8s %4s\n", "Type", "Physical", "Virtual", "#Pages", "Attr"); memory_size = 0; for (i = 0, j = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, efihdr->descriptor_size)) { if (boothowto & RB_VERBOSE) { if (p->md_type < nitems(types)) type = types[p->md_type]; else type = ""; printf("%23s %012llx %12p %08llx ", type, p->md_phys, p->md_virt, p->md_pages); if (p->md_attr & EFI_MD_ATTR_UC) printf("UC "); if (p->md_attr & EFI_MD_ATTR_WC) printf("WC "); if (p->md_attr & EFI_MD_ATTR_WT) printf("WT "); if (p->md_attr & EFI_MD_ATTR_WB) printf("WB "); if (p->md_attr & EFI_MD_ATTR_UCE) printf("UCE "); if (p->md_attr & EFI_MD_ATTR_WP) printf("WP "); if (p->md_attr & EFI_MD_ATTR_RP) printf("RP "); if (p->md_attr & EFI_MD_ATTR_XP) printf("XP "); if (p->md_attr & EFI_MD_ATTR_NV) printf("NV "); if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE) printf("MORE_RELIABLE "); if (p->md_attr & EFI_MD_ATTR_RO) printf("RO "); if (p->md_attr & EFI_MD_ATTR_RT) printf("RUNTIME"); printf("\n"); } switch (p->md_type) { case EFI_MD_TYPE_CODE: case EFI_MD_TYPE_DATA: case EFI_MD_TYPE_BS_CODE: case EFI_MD_TYPE_BS_DATA: case EFI_MD_TYPE_FREE: /* * We're allowed to use any entry with these types. */ break; default: continue; } j++; if (j >= FDT_MEM_REGIONS) break; mr[j].mr_start = p->md_phys; mr[j].mr_size = p->md_pages * PAGE_SIZE; memory_size += mr[j].mr_size; } *mrcnt = j; } #endif /* EFI */ #ifdef FDT static char * kenv_next(char *cp) { if (cp != NULL) { while (*cp != 0) cp++; cp++; if (*cp == 0) cp = NULL; } return (cp); } static void print_kenv(void) { char *cp; debugf("loader passed (static) kenv:\n"); if (loader_envp == NULL) { debugf(" no env, null ptr\n"); return; } debugf(" loader_envp = 0x%08x\n", (uint32_t)loader_envp); for (cp = loader_envp; cp != NULL; cp = kenv_next(cp)) debugf(" %x %s\n", (uint32_t)cp, cp); } #if __ARM_ARCH < 6 void * initarm(struct arm_boot_params *abp) { struct mem_region mem_regions[FDT_MEM_REGIONS]; struct pv_addr kernel_l1pt; struct pv_addr dpcpu; vm_offset_t dtbp, freemempos, l2_start, lastaddr; uint64_t memsize; uint32_t l2size; char *env; void *kmdp; u_int l1pagetable; int i, j, err_devmap, mem_regions_sz; lastaddr = parse_boot_param(abp); arm_physmem_kernaddr = abp->abp_physaddr; memsize = 0; cpuinfo_init(); set_cpufuncs(); /* * Find the dtb passed in by the boot loader. */ kmdp = preload_search_by_type("elf kernel"); if (kmdp != NULL) dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); else dtbp = (vm_offset_t)NULL; #if defined(FDT_DTB_STATIC) /* * In case the device tree blob was not retrieved (from metadata) try * to use the statically embedded one. */ if (dtbp == (vm_offset_t)NULL) dtbp = (vm_offset_t)&fdt_static_dtb; #endif if (OF_install(OFW_FDT, 0) == FALSE) panic("Cannot install FDT"); if (OF_init((void *)dtbp) != 0) panic("OF_init failed with the found device tree"); /* Grab physical memory regions information from device tree. */ if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, &memsize) != 0) panic("Cannot get physical memory regions"); arm_physmem_hardware_regions(mem_regions, mem_regions_sz); /* Grab reserved memory regions information from device tree. */ if (fdt_get_reserved_regions(mem_regions, &mem_regions_sz) == 0) arm_physmem_exclude_regions(mem_regions, mem_regions_sz, EXFLAG_NODUMP | EXFLAG_NOALLOC); /* Platform-specific initialisation */ platform_probe_and_attach(); pcpu0_init(); /* Do basic tuning, hz etc */ init_param1(); /* Calculate number of L2 tables needed for mapping vm_page_array */ l2size = (memsize / PAGE_SIZE) * sizeof(struct vm_page); l2size = (l2size >> L1_S_SHIFT) + 1; /* * Add one table for end of kernel map, one for stacks, msgbuf and * L1 and L2 tables map and one for vectors map. */ l2size += 3; /* Make it divisible by 4 */ l2size = (l2size + 3) & ~3; freemempos = (lastaddr + PAGE_MASK) & ~PAGE_MASK; /* Define a macro to simplify memory allocation */ #define valloc_pages(var, np) \ alloc_pages((var).pv_va, (np)); \ (var).pv_pa = (var).pv_va + (abp->abp_physaddr - KERNVIRTADDR); #define alloc_pages(var, np) \ (var) = freemempos; \ freemempos += (np * PAGE_SIZE); \ memset((char *)(var), 0, ((np) * PAGE_SIZE)); while (((freemempos - L1_TABLE_SIZE) & (L1_TABLE_SIZE - 1)) != 0) freemempos += PAGE_SIZE; valloc_pages(kernel_l1pt, L1_TABLE_SIZE / PAGE_SIZE); for (i = 0, j = 0; i < l2size; ++i) { if (!(i % (PAGE_SIZE / L2_TABLE_SIZE_REAL))) { valloc_pages(kernel_pt_table[i], L2_TABLE_SIZE / PAGE_SIZE); j = i; } else { kernel_pt_table[i].pv_va = kernel_pt_table[j].pv_va + L2_TABLE_SIZE_REAL * (i - j); kernel_pt_table[i].pv_pa = kernel_pt_table[i].pv_va - KERNVIRTADDR + abp->abp_physaddr; } } /* * Allocate a page for the system page mapped to 0x00000000 * or 0xffff0000. This page will just contain the system vectors * and can be shared by all processes. */ valloc_pages(systempage, 1); /* Allocate dynamic per-cpu area. */ valloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); dpcpu_init((void *)dpcpu.pv_va, 0); /* Allocate stacks for all modes */ valloc_pages(irqstack, IRQ_STACK_SIZE * MAXCPU); valloc_pages(abtstack, ABT_STACK_SIZE * MAXCPU); valloc_pages(undstack, UND_STACK_SIZE * MAXCPU); valloc_pages(kernelstack, kstack_pages * MAXCPU); valloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); /* * Now we start construction of the L1 page table * We start by mapping the L2 page tables into the L1. * This means that we can replace L1 mappings later on if necessary */ l1pagetable = kernel_l1pt.pv_va; /* * Try to map as much as possible of kernel text and data using * 1MB section mapping and for the rest of initial kernel address * space use L2 coarse tables. * * Link L2 tables for mapping remainder of kernel (modulo 1MB) * and kernel structures */ l2_start = lastaddr & ~(L1_S_OFFSET); for (i = 0 ; i < l2size - 1; i++) pmap_link_l2pt(l1pagetable, l2_start + i * L1_S_SIZE, &kernel_pt_table[i]); pmap_curmaxkvaddr = l2_start + (l2size - 1) * L1_S_SIZE; /* Map kernel code and data */ pmap_map_chunk(l1pagetable, KERNVIRTADDR, abp->abp_physaddr, (((uint32_t)(lastaddr) - KERNVIRTADDR) + PAGE_MASK) & ~PAGE_MASK, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); /* Map L1 directory and allocated L2 page tables */ pmap_map_chunk(l1pagetable, kernel_l1pt.pv_va, kernel_l1pt.pv_pa, L1_TABLE_SIZE, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE); pmap_map_chunk(l1pagetable, kernel_pt_table[0].pv_va, kernel_pt_table[0].pv_pa, L2_TABLE_SIZE_REAL * l2size, VM_PROT_READ|VM_PROT_WRITE, PTE_PAGETABLE); /* Map allocated DPCPU, stacks and msgbuf */ pmap_map_chunk(l1pagetable, dpcpu.pv_va, dpcpu.pv_pa, freemempos - dpcpu.pv_va, VM_PROT_READ|VM_PROT_WRITE, PTE_CACHE); /* Link and map the vector page */ pmap_link_l2pt(l1pagetable, ARM_VECTORS_HIGH, &kernel_pt_table[l2size - 1]); pmap_map_entry(l1pagetable, ARM_VECTORS_HIGH, systempage.pv_pa, VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE, PTE_CACHE); /* Establish static device mappings. */ err_devmap = platform_devmap_init(); devmap_bootstrap(l1pagetable, NULL); vm_max_kernel_address = platform_lastaddr(); cpu_domains((DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)) | DOMAIN_CLIENT); pmap_pa = kernel_l1pt.pv_pa; cpu_setttb(kernel_l1pt.pv_pa); cpu_tlb_flushID(); cpu_domains(DOMAIN_CLIENT << (PMAP_DOMAIN_KERNEL * 2)); /* * Now that proper page tables are installed, call cpu_setup() to enable * instruction and data caches and other chip-specific features. */ cpu_setup(); /* * Only after the SOC registers block is mapped we can perform device * tree fixups, as they may attempt to read parameters from hardware. */ OF_interpret("perform-fixup", 0); platform_gpio_init(); cninit(); debugf("initarm: console initialized\n"); debugf(" arg1 kmdp = 0x%08x\n", (uint32_t)kmdp); debugf(" boothowto = 0x%08x\n", boothowto); debugf(" dtbp = 0x%08x\n", (uint32_t)dtbp); print_kenv(); env = kern_getenv("kernelname"); if (env != NULL) { strlcpy(kernelname, env, sizeof(kernelname)); freeenv(env); } if (err_devmap != 0) printf("WARNING: could not fully configure devmap, error=%d\n", err_devmap); platform_late_init(); /* * Pages were allocated during the secondary bootstrap for the * stacks for different CPU modes. * We must now set the r13 registers in the different CPU modes to * point to these stacks. * Since the ARM stacks use STMFD etc. we must set r13 to the top end * of the stack memory. */ cpu_control(CPU_CONTROL_MMU_ENABLE, CPU_CONTROL_MMU_ENABLE); set_stackptrs(0); /* * We must now clean the cache again.... * Cleaning may be done by reading new data to displace any * dirty data in the cache. This will have happened in cpu_setttb() * but since we are boot strapping the addresses used for the read * may have just been remapped and thus the cache could be out * of sync. A re-clean after the switch will cure this. * After booting there are no gross relocations of the kernel thus * this problem will not occur after initarm(). */ cpu_idcache_wbinv_all(); undefined_init(); init_proc0(kernelstack.pv_va); arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL); pmap_bootstrap(freemempos, &kernel_l1pt); msgbufp = (void *)msgbufpv.pv_va; msgbufinit(msgbufp, msgbufsize); mutex_init(); /* * Exclude the kernel (and all the things we allocated which immediately * follow the kernel) from the VM allocation pool but not from crash * dumps. virtual_avail is a global variable which tracks the kva we've * "allocated" while setting up pmaps. * * Prepare the list of physical memory available to the vm subsystem. */ arm_physmem_exclude_region(abp->abp_physaddr, (virtual_avail - KERNVIRTADDR), EXFLAG_NOALLOC); arm_physmem_init_kernel_globals(); init_param2(physmem); dbg_monitor_init(); kdb_init(); return ((void *)(kernelstack.pv_va + USPACE_SVC_STACK_TOP - sizeof(struct pcb))); } #else /* __ARM_ARCH < 6 */ void * initarm(struct arm_boot_params *abp) { struct mem_region mem_regions[FDT_MEM_REGIONS]; vm_paddr_t lastaddr; vm_offset_t dtbp, kernelstack, dpcpu; char *env; void *kmdp; int err_devmap, mem_regions_sz; #ifdef EFI struct efi_map_header *efihdr; #endif /* get last allocated physical address */ arm_physmem_kernaddr = abp->abp_physaddr; lastaddr = parse_boot_param(abp) - KERNVIRTADDR + arm_physmem_kernaddr; set_cpufuncs(); cpuinfo_init(); /* * Find the dtb passed in by the boot loader. */ kmdp = preload_search_by_type("elf kernel"); dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); #if defined(FDT_DTB_STATIC) /* * In case the device tree blob was not retrieved (from metadata) try * to use the statically embedded one. */ if (dtbp == (vm_offset_t)NULL) dtbp = (vm_offset_t)&fdt_static_dtb; #endif if (OF_install(OFW_FDT, 0) == FALSE) panic("Cannot install FDT"); if (OF_init((void *)dtbp) != 0) panic("OF_init failed with the found device tree"); #if defined(LINUX_BOOT_ABI) if (loader_envp == NULL && fdt_get_chosen_bootargs(linux_command_line, LBABI_MAX_COMMAND_LINE) == 0) cmdline_set_env(linux_command_line, CMDLINE_GUARD); #endif #ifdef EFI efihdr = (struct efi_map_header *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP); if (efihdr != NULL) { add_efi_map_entries(efihdr, mem_regions, &mem_regions_sz); } else #endif { /* Grab physical memory regions information from device tree. */ if (fdt_get_mem_regions(mem_regions, &mem_regions_sz,NULL) != 0) panic("Cannot get physical memory regions"); } arm_physmem_hardware_regions(mem_regions, mem_regions_sz); /* Grab reserved memory regions information from device tree. */ if (fdt_get_reserved_regions(mem_regions, &mem_regions_sz) == 0) arm_physmem_exclude_regions(mem_regions, mem_regions_sz, EXFLAG_NODUMP | EXFLAG_NOALLOC); /* * Set TEX remapping registers. * Setup kernel page tables and switch to kernel L1 page table. */ pmap_set_tex(); pmap_bootstrap_prepare(lastaddr); /* * Now that proper page tables are installed, call cpu_setup() to enable * instruction and data caches and other chip-specific features. */ cpu_setup(); /* Platform-specific initialisation */ platform_probe_and_attach(); pcpu0_init(); /* Do basic tuning, hz etc */ init_param1(); /* * Allocate a page for the system page mapped to 0xffff0000 * This page will just contain the system vectors and can be * shared by all processes. */ systempage = pmap_preboot_get_pages(1); /* Map the vector page. */ pmap_preboot_map_pages(systempage, ARM_VECTORS_HIGH, 1); if (virtual_end >= ARM_VECTORS_HIGH) virtual_end = ARM_VECTORS_HIGH - 1; /* Allocate dynamic per-cpu area. */ dpcpu = pmap_preboot_get_vpages(DPCPU_SIZE / PAGE_SIZE); dpcpu_init((void *)dpcpu, 0); /* Allocate stacks for all modes */ irqstack = pmap_preboot_get_vpages(IRQ_STACK_SIZE * MAXCPU); abtstack = pmap_preboot_get_vpages(ABT_STACK_SIZE * MAXCPU); undstack = pmap_preboot_get_vpages(UND_STACK_SIZE * MAXCPU ); kernelstack = pmap_preboot_get_vpages(kstack_pages * MAXCPU); /* Allocate message buffer. */ msgbufp = (void *)pmap_preboot_get_vpages( round_page(msgbufsize) / PAGE_SIZE); /* * Pages were allocated during the secondary bootstrap for the * stacks for different CPU modes. * We must now set the r13 registers in the different CPU modes to * point to these stacks. * Since the ARM stacks use STMFD etc. we must set r13 to the top end * of the stack memory. */ set_stackptrs(0); mutex_init(); /* Establish static device mappings. */ err_devmap = platform_devmap_init(); devmap_bootstrap(0, NULL); vm_max_kernel_address = platform_lastaddr(); /* * Only after the SOC registers block is mapped we can perform device * tree fixups, as they may attempt to read parameters from hardware. */ OF_interpret("perform-fixup", 0); platform_gpio_init(); cninit(); debugf("initarm: console initialized\n"); debugf(" arg1 kmdp = 0x%08x\n", (uint32_t)kmdp); debugf(" boothowto = 0x%08x\n", boothowto); debugf(" dtbp = 0x%08x\n", (uint32_t)dtbp); debugf(" lastaddr1: 0x%08x\n", lastaddr); print_kenv(); env = kern_getenv("kernelname"); if (env != NULL) strlcpy(kernelname, env, sizeof(kernelname)); if (err_devmap != 0) printf("WARNING: could not fully configure devmap, error=%d\n", err_devmap); platform_late_init(); /* * We must now clean the cache again.... * Cleaning may be done by reading new data to displace any * dirty data in the cache. This will have happened in cpu_setttb() * but since we are boot strapping the addresses used for the read * may have just been remapped and thus the cache could be out * of sync. A re-clean after the switch will cure this. * After booting there are no gross relocations of the kernel thus * this problem will not occur after initarm(). */ /* Set stack for exception handlers */ undefined_init(); init_proc0(kernelstack); arm_vector_init(ARM_VECTORS_HIGH, ARM_VEC_ALL); enable_interrupts(PSR_A); pmap_bootstrap(0); /* Exclude the kernel (and all the things we allocated which immediately * follow the kernel) from the VM allocation pool but not from crash * dumps. virtual_avail is a global variable which tracks the kva we've * "allocated" while setting up pmaps. * * Prepare the list of physical memory available to the vm subsystem. */ arm_physmem_exclude_region(abp->abp_physaddr, pmap_preboot_get_pages(0) - abp->abp_physaddr, EXFLAG_NOALLOC); arm_physmem_init_kernel_globals(); init_param2(physmem); /* Init message buffer. */ msgbufinit(msgbufp, msgbufsize); dbg_monitor_init(); kdb_init(); return ((void *)STACKALIGN(thread0.td_pcb)); } #endif /* __ARM_ARCH < 6 */ #endif /* FDT */ - -uint32_t (*arm_cpu_fill_vdso_timehands)(struct vdso_timehands *, - struct timecounter *); - -uint32_t -cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc) -{ - - return (arm_cpu_fill_vdso_timehands != NULL ? - arm_cpu_fill_vdso_timehands(vdso_th, tc) : 0); -} Index: head/sys/arm/include/md_var.h =================================================================== --- head/sys/arm/include/md_var.h (revision 304284) +++ head/sys/arm/include/md_var.h (revision 304285) @@ -1,81 +1,76 @@ /*- * Copyright (c) 1995 Bruce D. Evans. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: FreeBSD: src/sys/i386/include/md_var.h,v 1.40 2001/07/12 * $FreeBSD$ */ #ifndef _MACHINE_MD_VAR_H_ #define _MACHINE_MD_VAR_H_ extern long Maxmem; extern char sigcode[]; extern int szsigcode; extern uint32_t *vm_page_dump; extern int vm_page_dump_size; extern int (*_arm_memcpy)(void *, void *, int, int); extern int (*_arm_bzero)(void *, int, int); extern int _min_memcpy_size; extern int _min_bzero_size; -struct vdso_timehands; -struct timecounter; -extern uint32_t (*arm_cpu_fill_vdso_timehands)(struct vdso_timehands *, - struct timecounter *); - #define DST_IS_USER 0x1 #define SRC_IS_USER 0x2 #define IS_PHYSICAL 0x4 enum cpu_class { CPU_CLASS_NONE, CPU_CLASS_ARM9TDMI, CPU_CLASS_ARM9ES, CPU_CLASS_ARM9EJS, CPU_CLASS_ARM10E, CPU_CLASS_ARM10EJ, CPU_CLASS_CORTEXA, CPU_CLASS_KRAIT, CPU_CLASS_XSCALE, CPU_CLASS_ARM11J, CPU_CLASS_MARVELL }; extern enum cpu_class cpu_class; struct dumperinfo; extern int busdma_swi_pending; void busdma_swi(void); void dump_add_page(vm_paddr_t); void dump_drop_page(vm_paddr_t); int minidumpsys(struct dumperinfo *); extern uint32_t initial_fpscr; #endif /* !_MACHINE_MD_VAR_H_ */ Index: head/sys/arm/include/vdso.h =================================================================== --- head/sys/arm/include/vdso.h (revision 304284) +++ head/sys/arm/include/vdso.h (revision 304285) @@ -1,35 +1,37 @@ /*- * Copyright 2012 Konstantin Belousov . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _ARM_VDSO_H #define _ARM_VDSO_H #define VDSO_TIMEHANDS_MD \ uint32_t th_physical; \ uint32_t th_res[7]; +#define VDSO_TH_ALGO_ARM_GENTIM VDSO_TH_ALGO_1 + #endif Index: head/sys/arm64/arm64/machdep.c =================================================================== --- head/sys/arm64/arm64/machdep.c (revision 304284) +++ head/sys/arm64/arm64/machdep.c (revision 304285) @@ -1,1107 +1,1096 @@ /*- * Copyright (c) 2014 Andrew Turner * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * */ #include "opt_acpi.h" #include "opt_platform.h" #include "opt_ddb.h" #include __FBSDID("$FreeBSD$"); #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef VFP #include #endif #ifdef DEV_ACPI #include #include #endif #ifdef FDT #include #include #endif enum arm64_bus arm64_bus_method = ARM64_BUS_NONE; struct pcpu __pcpu[MAXCPU]; static struct trapframe proc0_tf; vm_paddr_t phys_avail[PHYS_AVAIL_SIZE + 2]; vm_paddr_t dump_avail[PHYS_AVAIL_SIZE + 2]; int early_boot = 1; int cold = 1; long realmem = 0; long Maxmem = 0; #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) vm_paddr_t physmap[PHYSMAP_SIZE]; u_int physmap_idx; struct kva_md_info kmi; int64_t dcache_line_size; /* The minimum D cache line size */ int64_t icache_line_size; /* The minimum I cache line size */ int64_t idcache_line_size; /* The minimum cache line size */ int64_t dczva_line_size; /* The size of cache line the dc zva zeroes */ /* pagezero_* implementations are provided in support.S */ void pagezero_simple(void *); void pagezero_cache(void *); /* pagezero_simple is default pagezero */ void (*pagezero)(void *p) = pagezero_simple; static void cpu_startup(void *dummy) { identify_cpu(); vm_ksubmap_init(&kmi); bufinit(); vm_pager_bufferinit(); } SYSINIT(cpu, SI_SUB_CPU, SI_ORDER_FIRST, cpu_startup, NULL); int cpu_idle_wakeup(int cpu) { return (0); } int fill_regs(struct thread *td, struct reg *regs) { struct trapframe *frame; frame = td->td_frame; regs->sp = frame->tf_sp; regs->lr = frame->tf_lr; regs->elr = frame->tf_elr; regs->spsr = frame->tf_spsr; memcpy(regs->x, frame->tf_x, sizeof(regs->x)); return (0); } int set_regs(struct thread *td, struct reg *regs) { struct trapframe *frame; frame = td->td_frame; frame->tf_sp = regs->sp; frame->tf_lr = regs->lr; frame->tf_elr = regs->elr; frame->tf_spsr = regs->spsr; memcpy(frame->tf_x, regs->x, sizeof(frame->tf_x)); return (0); } int fill_fpregs(struct thread *td, struct fpreg *regs) { #ifdef VFP struct pcb *pcb; pcb = td->td_pcb; if ((pcb->pcb_fpflags & PCB_FP_STARTED) != 0) { /* * If we have just been running VFP instructions we will * need to save the state to memcpy it below. */ vfp_save_state(td, pcb); memcpy(regs->fp_q, pcb->pcb_vfp, sizeof(regs->fp_q)); regs->fp_cr = pcb->pcb_fpcr; regs->fp_sr = pcb->pcb_fpsr; } else #endif memset(regs->fp_q, 0, sizeof(regs->fp_q)); return (0); } int set_fpregs(struct thread *td, struct fpreg *regs) { #ifdef VFP struct pcb *pcb; pcb = td->td_pcb; memcpy(pcb->pcb_vfp, regs->fp_q, sizeof(regs->fp_q)); pcb->pcb_fpcr = regs->fp_cr; pcb->pcb_fpsr = regs->fp_sr; #endif return (0); } int fill_dbregs(struct thread *td, struct dbreg *regs) { panic("ARM64TODO: fill_dbregs"); } int set_dbregs(struct thread *td, struct dbreg *regs) { panic("ARM64TODO: set_dbregs"); } int ptrace_set_pc(struct thread *td, u_long addr) { panic("ARM64TODO: ptrace_set_pc"); return (0); } int ptrace_single_step(struct thread *td) { td->td_frame->tf_spsr |= PSR_SS; td->td_pcb->pcb_flags |= PCB_SINGLE_STEP; return (0); } int ptrace_clear_single_step(struct thread *td) { td->td_frame->tf_spsr &= ~PSR_SS; td->td_pcb->pcb_flags &= ~PCB_SINGLE_STEP; return (0); } void exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) { struct trapframe *tf = td->td_frame; memset(tf, 0, sizeof(struct trapframe)); /* * We need to set x0 for init as it doesn't call * cpu_set_syscall_retval to copy the value. We also * need to set td_retval for the cases where we do. */ tf->tf_x[0] = td->td_retval[0] = stack; tf->tf_sp = STACKALIGN(stack); tf->tf_lr = imgp->entry_addr; tf->tf_elr = imgp->entry_addr; } /* Sanity check these are the same size, they will be memcpy'd to and fro */ CTASSERT(sizeof(((struct trapframe *)0)->tf_x) == sizeof((struct gpregs *)0)->gp_x); CTASSERT(sizeof(((struct trapframe *)0)->tf_x) == sizeof((struct reg *)0)->x); int get_mcontext(struct thread *td, mcontext_t *mcp, int clear_ret) { struct trapframe *tf = td->td_frame; if (clear_ret & GET_MC_CLEAR_RET) { mcp->mc_gpregs.gp_x[0] = 0; mcp->mc_gpregs.gp_spsr = tf->tf_spsr & ~PSR_C; } else { mcp->mc_gpregs.gp_x[0] = tf->tf_x[0]; mcp->mc_gpregs.gp_spsr = tf->tf_spsr; } memcpy(&mcp->mc_gpregs.gp_x[1], &tf->tf_x[1], sizeof(mcp->mc_gpregs.gp_x[1]) * (nitems(mcp->mc_gpregs.gp_x) - 1)); mcp->mc_gpregs.gp_sp = tf->tf_sp; mcp->mc_gpregs.gp_lr = tf->tf_lr; mcp->mc_gpregs.gp_elr = tf->tf_elr; return (0); } int set_mcontext(struct thread *td, mcontext_t *mcp) { struct trapframe *tf = td->td_frame; memcpy(tf->tf_x, mcp->mc_gpregs.gp_x, sizeof(tf->tf_x)); tf->tf_sp = mcp->mc_gpregs.gp_sp; tf->tf_lr = mcp->mc_gpregs.gp_lr; tf->tf_elr = mcp->mc_gpregs.gp_elr; tf->tf_spsr = mcp->mc_gpregs.gp_spsr; return (0); } static void get_fpcontext(struct thread *td, mcontext_t *mcp) { #ifdef VFP struct pcb *curpcb; critical_enter(); curpcb = curthread->td_pcb; if ((curpcb->pcb_fpflags & PCB_FP_STARTED) != 0) { /* * If we have just been running VFP instructions we will * need to save the state to memcpy it below. */ vfp_save_state(td, curpcb); memcpy(mcp->mc_fpregs.fp_q, curpcb->pcb_vfp, sizeof(mcp->mc_fpregs)); mcp->mc_fpregs.fp_cr = curpcb->pcb_fpcr; mcp->mc_fpregs.fp_sr = curpcb->pcb_fpsr; mcp->mc_fpregs.fp_flags = curpcb->pcb_fpflags; mcp->mc_flags |= _MC_FP_VALID; } critical_exit(); #endif } static void set_fpcontext(struct thread *td, mcontext_t *mcp) { #ifdef VFP struct pcb *curpcb; critical_enter(); if ((mcp->mc_flags & _MC_FP_VALID) != 0) { curpcb = curthread->td_pcb; /* * Discard any vfp state for the current thread, we * are about to override it. */ vfp_discard(td); memcpy(curpcb->pcb_vfp, mcp->mc_fpregs.fp_q, sizeof(mcp->mc_fpregs)); curpcb->pcb_fpcr = mcp->mc_fpregs.fp_cr; curpcb->pcb_fpsr = mcp->mc_fpregs.fp_sr; curpcb->pcb_fpflags = mcp->mc_fpregs.fp_flags; } critical_exit(); #endif } void cpu_idle(int busy) { spinlock_enter(); if (!busy) cpu_idleclock(); if (!sched_runnable()) __asm __volatile( "dsb sy \n" "wfi \n"); if (!busy) cpu_activeclock(); spinlock_exit(); } void cpu_halt(void) { /* We should have shutdown by now, if not enter a low power sleep */ intr_disable(); while (1) { __asm __volatile("wfi"); } } /* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */ void cpu_flush_dcache(void *ptr, size_t len) { /* ARM64TODO TBD */ } /* Get current clock frequency for the given CPU ID. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { panic("ARM64TODO: cpu_est_clockrate"); } void cpu_pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) { pcpu->pc_acpi_id = 0xffffffff; } void spinlock_enter(void) { struct thread *td; register_t daif; td = curthread; if (td->td_md.md_spinlock_count == 0) { daif = intr_disable(); td->td_md.md_spinlock_count = 1; td->td_md.md_saved_daif = daif; } else td->td_md.md_spinlock_count++; critical_enter(); } void spinlock_exit(void) { struct thread *td; register_t daif; td = curthread; critical_exit(); daif = td->td_md.md_saved_daif; td->td_md.md_spinlock_count--; if (td->td_md.md_spinlock_count == 0) intr_restore(daif); } #ifndef _SYS_SYSPROTO_H_ struct sigreturn_args { ucontext_t *ucp; }; #endif int sys_sigreturn(struct thread *td, struct sigreturn_args *uap) { ucontext_t uc; uint32_t spsr; if (uap == NULL) return (EFAULT); if (copyin(uap->sigcntxp, &uc, sizeof(uc))) return (EFAULT); spsr = uc.uc_mcontext.mc_gpregs.gp_spsr; if ((spsr & PSR_M_MASK) != PSR_M_EL0t || (spsr & (PSR_F | PSR_I | PSR_A | PSR_D)) != 0) return (EINVAL); set_mcontext(td, &uc.uc_mcontext); set_fpcontext(td, &uc.uc_mcontext); /* Restore signal mask. */ kern_sigprocmask(td, SIG_SETMASK, &uc.uc_sigmask, NULL, 0); return (EJUSTRETURN); } /* * Construct a PCB from a trapframe. This is called from kdb_trap() where * we want to start a backtrace from the function that caused us to enter * the debugger. We have the context in the trapframe, but base the trace * on the PCB. The PCB doesn't have to be perfect, as long as it contains * enough for a backtrace. */ void makectx(struct trapframe *tf, struct pcb *pcb) { int i; for (i = 0; i < PCB_LR; i++) pcb->pcb_x[i] = tf->tf_x[i]; pcb->pcb_x[PCB_LR] = tf->tf_lr; pcb->pcb_pc = tf->tf_elr; pcb->pcb_sp = tf->tf_sp; } void sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) { struct thread *td; struct proc *p; struct trapframe *tf; struct sigframe *fp, frame; struct sigacts *psp; struct sysentvec *sysent; int code, onstack, sig; td = curthread; p = td->td_proc; PROC_LOCK_ASSERT(p, MA_OWNED); sig = ksi->ksi_signo; code = ksi->ksi_code; psp = p->p_sigacts; mtx_assert(&psp->ps_mtx, MA_OWNED); tf = td->td_frame; onstack = sigonstack(tf->tf_sp); CTR4(KTR_SIG, "sendsig: td=%p (%s) catcher=%p sig=%d", td, p->p_comm, catcher, sig); /* Allocate and validate space for the signal handler context. */ if ((td->td_pflags & TDP_ALTSTACK) != 0 && !onstack && SIGISMEMBER(psp->ps_sigonstack, sig)) { fp = (struct sigframe *)((uintptr_t)td->td_sigstk.ss_sp + td->td_sigstk.ss_size); #if defined(COMPAT_43) td->td_sigstk.ss_flags |= SS_ONSTACK; #endif } else { fp = (struct sigframe *)td->td_frame->tf_sp; } /* Make room, keeping the stack aligned */ fp--; fp = (struct sigframe *)STACKALIGN(fp); /* Fill in the frame to copy out */ get_mcontext(td, &frame.sf_uc.uc_mcontext, 0); get_fpcontext(td, &frame.sf_uc.uc_mcontext); frame.sf_si = ksi->ksi_info; frame.sf_uc.uc_sigmask = *mask; frame.sf_uc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) ? ((onstack) ? SS_ONSTACK : 0) : SS_DISABLE; frame.sf_uc.uc_stack = td->td_sigstk; mtx_unlock(&psp->ps_mtx); PROC_UNLOCK(td->td_proc); /* Copy the sigframe out to the user's stack. */ if (copyout(&frame, fp, sizeof(*fp)) != 0) { /* Process has trashed its stack. Kill it. */ CTR2(KTR_SIG, "sendsig: sigexit td=%p fp=%p", td, fp); PROC_LOCK(p); sigexit(td, SIGILL); } tf->tf_x[0]= sig; tf->tf_x[1] = (register_t)&fp->sf_si; tf->tf_x[2] = (register_t)&fp->sf_uc; tf->tf_elr = (register_t)catcher; tf->tf_sp = (register_t)fp; sysent = p->p_sysent; if (sysent->sv_sigcode_base != 0) tf->tf_lr = (register_t)sysent->sv_sigcode_base; else tf->tf_lr = (register_t)(sysent->sv_psstrings - *(sysent->sv_szsigcode)); CTR3(KTR_SIG, "sendsig: return td=%p pc=%#x sp=%#x", td, tf->tf_elr, tf->tf_sp); PROC_LOCK(p); mtx_lock(&psp->ps_mtx); } static void init_proc0(vm_offset_t kstack) { struct pcpu *pcpup = &__pcpu[0]; proc_linkup0(&proc0, &thread0); thread0.td_kstack = kstack; thread0.td_pcb = (struct pcb *)(thread0.td_kstack) - 1; thread0.td_pcb->pcb_fpflags = 0; thread0.td_pcb->pcb_vfpcpu = UINT_MAX; thread0.td_frame = &proc0_tf; pcpup->pc_curpcb = thread0.td_pcb; } typedef struct { uint32_t type; uint64_t phys_start; uint64_t virt_start; uint64_t num_pages; uint64_t attr; } EFI_MEMORY_DESCRIPTOR; static int add_physmap_entry(uint64_t base, uint64_t length, vm_paddr_t *physmap, u_int *physmap_idxp) { u_int i, insert_idx, _physmap_idx; _physmap_idx = *physmap_idxp; if (length == 0) return (1); /* * Find insertion point while checking for overlap. Start off by * assuming the new entry will be added to the end. */ insert_idx = _physmap_idx; for (i = 0; i <= _physmap_idx; i += 2) { if (base < physmap[i + 1]) { if (base + length <= physmap[i]) { insert_idx = i; break; } if (boothowto & RB_VERBOSE) printf( "Overlapping memory regions, ignoring second region\n"); return (1); } } /* See if we can prepend to the next entry. */ if (insert_idx <= _physmap_idx && base + length == physmap[insert_idx]) { physmap[insert_idx] = base; return (1); } /* See if we can append to the previous entry. */ if (insert_idx > 0 && base == physmap[insert_idx - 1]) { physmap[insert_idx - 1] += length; return (1); } _physmap_idx += 2; *physmap_idxp = _physmap_idx; if (_physmap_idx == PHYSMAP_SIZE) { printf( "Too many segments in the physical address map, giving up\n"); return (0); } /* * Move the last 'N' entries down to make room for the new * entry if needed. */ for (i = _physmap_idx; i > insert_idx; i -= 2) { physmap[i] = physmap[i - 2]; physmap[i + 1] = physmap[i - 1]; } /* Insert the new entry. */ physmap[insert_idx] = base; physmap[insert_idx + 1] = base + length; return (1); } #ifdef FDT static void add_fdt_mem_regions(struct mem_region *mr, int mrcnt, vm_paddr_t *physmap, u_int *physmap_idxp) { for (int i = 0; i < mrcnt; i++) { if (!add_physmap_entry(mr[i].mr_start, mr[i].mr_size, physmap, physmap_idxp)) break; } } #endif #define efi_next_descriptor(ptr, size) \ ((struct efi_md *)(((uint8_t *) ptr) + size)) static void add_efi_map_entries(struct efi_map_header *efihdr, vm_paddr_t *physmap, u_int *physmap_idxp) { struct efi_md *map, *p; const char *type; size_t efisz; int ndesc, i; static const char *types[] = { "Reserved", "LoaderCode", "LoaderData", "BootServicesCode", "BootServicesData", "RuntimeServicesCode", "RuntimeServicesData", "ConventionalMemory", "UnusableMemory", "ACPIReclaimMemory", "ACPIMemoryNVS", "MemoryMappedIO", "MemoryMappedIOPortSpace", "PalCode", "PersistentMemory" }; /* * Memory map data provided by UEFI via the GetMemoryMap * Boot Services API. */ efisz = (sizeof(struct efi_map_header) + 0xf) & ~0xf; map = (struct efi_md *)((uint8_t *)efihdr + efisz); if (efihdr->descriptor_size == 0) return; ndesc = efihdr->memory_size / efihdr->descriptor_size; if (boothowto & RB_VERBOSE) printf("%23s %12s %12s %8s %4s\n", "Type", "Physical", "Virtual", "#Pages", "Attr"); for (i = 0, p = map; i < ndesc; i++, p = efi_next_descriptor(p, efihdr->descriptor_size)) { if (boothowto & RB_VERBOSE) { if (p->md_type < nitems(types)) type = types[p->md_type]; else type = ""; printf("%23s %012lx %12p %08lx ", type, p->md_phys, p->md_virt, p->md_pages); if (p->md_attr & EFI_MD_ATTR_UC) printf("UC "); if (p->md_attr & EFI_MD_ATTR_WC) printf("WC "); if (p->md_attr & EFI_MD_ATTR_WT) printf("WT "); if (p->md_attr & EFI_MD_ATTR_WB) printf("WB "); if (p->md_attr & EFI_MD_ATTR_UCE) printf("UCE "); if (p->md_attr & EFI_MD_ATTR_WP) printf("WP "); if (p->md_attr & EFI_MD_ATTR_RP) printf("RP "); if (p->md_attr & EFI_MD_ATTR_XP) printf("XP "); if (p->md_attr & EFI_MD_ATTR_NV) printf("NV "); if (p->md_attr & EFI_MD_ATTR_MORE_RELIABLE) printf("MORE_RELIABLE "); if (p->md_attr & EFI_MD_ATTR_RO) printf("RO "); if (p->md_attr & EFI_MD_ATTR_RT) printf("RUNTIME"); printf("\n"); } switch (p->md_type) { case EFI_MD_TYPE_CODE: case EFI_MD_TYPE_DATA: case EFI_MD_TYPE_BS_CODE: case EFI_MD_TYPE_BS_DATA: case EFI_MD_TYPE_FREE: /* * We're allowed to use any entry with these types. */ break; default: continue; } if (!add_physmap_entry(p->md_phys, (p->md_pages * PAGE_SIZE), physmap, physmap_idxp)) break; } } #ifdef FDT static void try_load_dtb(caddr_t kmdp) { vm_offset_t dtbp; dtbp = MD_FETCH(kmdp, MODINFOMD_DTBP, vm_offset_t); if (dtbp == (vm_offset_t)NULL) { printf("ERROR loading DTB\n"); return; } if (OF_install(OFW_FDT, 0) == FALSE) panic("Cannot install FDT"); if (OF_init((void *)dtbp) != 0) panic("OF_init failed with the found device tree"); } #endif static bool bus_probe(void) { bool has_acpi, has_fdt; char *order, *env; has_acpi = has_fdt = false; #ifdef FDT has_fdt = (OF_peer(0) != 0); #endif #ifdef DEV_ACPI has_acpi = (acpi_find_table(ACPI_SIG_SPCR) != 0); #endif env = kern_getenv("kern.cfg.order"); if (env != NULL) { order = env; while (order != NULL) { if (has_acpi && strncmp(order, "acpi", 4) == 0 && (order[4] == ',' || order[4] == '\0')) { arm64_bus_method = ARM64_BUS_ACPI; break; } if (has_fdt && strncmp(order, "fdt", 3) == 0 && (order[3] == ',' || order[3] == '\0')) { arm64_bus_method = ARM64_BUS_FDT; break; } order = strchr(order, ','); } freeenv(env); /* If we set the bus method it is valid */ if (arm64_bus_method != ARM64_BUS_NONE) return (true); } /* If no order or an invalid order was set use the default */ if (arm64_bus_method == ARM64_BUS_NONE) { if (has_fdt) arm64_bus_method = ARM64_BUS_FDT; else if (has_acpi) arm64_bus_method = ARM64_BUS_ACPI; } /* * If no option was set the default is valid, otherwise we are * setting one to get cninit() working, then calling panic to tell * the user about the invalid bus setup. */ return (env == NULL); } static void cache_setup(void) { int dcache_line_shift, icache_line_shift, dczva_line_shift; uint32_t ctr_el0; uint32_t dczid_el0; ctr_el0 = READ_SPECIALREG(ctr_el0); /* Read the log2 words in each D cache line */ dcache_line_shift = CTR_DLINE_SIZE(ctr_el0); /* Get the D cache line size */ dcache_line_size = sizeof(int) << dcache_line_shift; /* And the same for the I cache */ icache_line_shift = CTR_ILINE_SIZE(ctr_el0); icache_line_size = sizeof(int) << icache_line_shift; idcache_line_size = MIN(dcache_line_size, icache_line_size); dczid_el0 = READ_SPECIALREG(dczid_el0); /* Check if dc zva is not prohibited */ if (dczid_el0 & DCZID_DZP) dczva_line_size = 0; else { /* Same as with above calculations */ dczva_line_shift = DCZID_BS_SIZE(dczid_el0); dczva_line_size = sizeof(int) << dczva_line_shift; /* Change pagezero function */ pagezero = pagezero_cache; } } void initarm(struct arm64_bootparams *abp) { struct efi_map_header *efihdr; struct pcpu *pcpup; #ifdef FDT struct mem_region mem_regions[FDT_MEM_REGIONS]; int mem_regions_sz; #endif vm_offset_t lastaddr; caddr_t kmdp; vm_paddr_t mem_len; bool valid; int i; /* Set the module data location */ preload_metadata = (caddr_t)(uintptr_t)(abp->modulep); /* Find the kernel address */ kmdp = preload_search_by_type("elf kernel"); if (kmdp == NULL) kmdp = preload_search_by_type("elf64 kernel"); boothowto = MD_FETCH(kmdp, MODINFOMD_HOWTO, int); init_static_kenv(MD_FETCH(kmdp, MODINFOMD_ENVP, char *), 0); #ifdef FDT try_load_dtb(kmdp); #endif /* Find the address to start allocating from */ lastaddr = MD_FETCH(kmdp, MODINFOMD_KERNEND, vm_offset_t); /* Load the physical memory ranges */ physmap_idx = 0; efihdr = (struct efi_map_header *)preload_search_info(kmdp, MODINFO_METADATA | MODINFOMD_EFI_MAP); if (efihdr != NULL) add_efi_map_entries(efihdr, physmap, &physmap_idx); #ifdef FDT else { /* Grab physical memory regions information from device tree. */ if (fdt_get_mem_regions(mem_regions, &mem_regions_sz, NULL) != 0) panic("Cannot get physical memory regions"); add_fdt_mem_regions(mem_regions, mem_regions_sz, physmap, &physmap_idx); } #endif /* Print the memory map */ mem_len = 0; for (i = 0; i < physmap_idx; i += 2) { dump_avail[i] = physmap[i]; dump_avail[i + 1] = physmap[i + 1]; mem_len += physmap[i + 1] - physmap[i]; } dump_avail[i] = 0; dump_avail[i + 1] = 0; /* Set the pcpu data, this is needed by pmap_bootstrap */ pcpup = &__pcpu[0]; pcpu_init(pcpup, 0, sizeof(struct pcpu)); /* * Set the pcpu pointer with a backup in tpidr_el1 to be * loaded when entering the kernel from userland. */ __asm __volatile( "mov x18, %0 \n" "msr tpidr_el1, %0" :: "r"(pcpup)); PCPU_SET(curthread, &thread0); /* Do basic tuning, hz etc */ init_param1(); cache_setup(); /* Bootstrap enough of pmap to enter the kernel proper */ pmap_bootstrap(abp->kern_l0pt, abp->kern_l1pt, KERNBASE - abp->kern_delta, lastaddr - KERNBASE); devmap_bootstrap(0, NULL); valid = bus_probe(); cninit(); if (!valid) panic("Invalid bus configuration: %s", kern_getenv("kern.cfg.order")); init_proc0(abp->kern_stack); msgbufinit(msgbufp, msgbufsize); mutex_init(); init_param2(physmem); dbg_monitor_init(); kdb_init(); early_boot = 0; } -uint32_t (*arm_cpu_fill_vdso_timehands)(struct vdso_timehands *, - struct timecounter *); - -uint32_t -cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc) -{ - - return (arm_cpu_fill_vdso_timehands != NULL ? - arm_cpu_fill_vdso_timehands(vdso_th, tc) : 0); -} - #ifdef DDB #include DB_SHOW_COMMAND(specialregs, db_show_spregs) { #define PRINT_REG(reg) \ db_printf(__STRING(reg) " = %#016lx\n", READ_SPECIALREG(reg)) PRINT_REG(actlr_el1); PRINT_REG(afsr0_el1); PRINT_REG(afsr1_el1); PRINT_REG(aidr_el1); PRINT_REG(amair_el1); PRINT_REG(ccsidr_el1); PRINT_REG(clidr_el1); PRINT_REG(contextidr_el1); PRINT_REG(cpacr_el1); PRINT_REG(csselr_el1); PRINT_REG(ctr_el0); PRINT_REG(currentel); PRINT_REG(daif); PRINT_REG(dczid_el0); PRINT_REG(elr_el1); PRINT_REG(esr_el1); PRINT_REG(far_el1); #if 0 /* ARM64TODO: Enable VFP before reading floating-point registers */ PRINT_REG(fpcr); PRINT_REG(fpsr); #endif PRINT_REG(id_aa64afr0_el1); PRINT_REG(id_aa64afr1_el1); PRINT_REG(id_aa64dfr0_el1); PRINT_REG(id_aa64dfr1_el1); PRINT_REG(id_aa64isar0_el1); PRINT_REG(id_aa64isar1_el1); PRINT_REG(id_aa64pfr0_el1); PRINT_REG(id_aa64pfr1_el1); PRINT_REG(id_afr0_el1); PRINT_REG(id_dfr0_el1); PRINT_REG(id_isar0_el1); PRINT_REG(id_isar1_el1); PRINT_REG(id_isar2_el1); PRINT_REG(id_isar3_el1); PRINT_REG(id_isar4_el1); PRINT_REG(id_isar5_el1); PRINT_REG(id_mmfr0_el1); PRINT_REG(id_mmfr1_el1); PRINT_REG(id_mmfr2_el1); PRINT_REG(id_mmfr3_el1); #if 0 /* Missing from llvm */ PRINT_REG(id_mmfr4_el1); #endif PRINT_REG(id_pfr0_el1); PRINT_REG(id_pfr1_el1); PRINT_REG(isr_el1); PRINT_REG(mair_el1); PRINT_REG(midr_el1); PRINT_REG(mpidr_el1); PRINT_REG(mvfr0_el1); PRINT_REG(mvfr1_el1); PRINT_REG(mvfr2_el1); PRINT_REG(revidr_el1); PRINT_REG(sctlr_el1); PRINT_REG(sp_el0); PRINT_REG(spsel); PRINT_REG(spsr_el1); PRINT_REG(tcr_el1); PRINT_REG(tpidr_el0); PRINT_REG(tpidr_el1); PRINT_REG(tpidrro_el0); PRINT_REG(ttbr0_el1); PRINT_REG(ttbr1_el1); PRINT_REG(vbar_el1); #undef PRINT_REG } DB_SHOW_COMMAND(vtop, db_show_vtop) { uint64_t phys; if (have_addr) { phys = arm64_address_translate_s1e1r(addr); db_printf("Physical address reg: 0x%016lx\n", phys); } else db_printf("show vtop \n"); } #endif Index: head/sys/arm64/include/md_var.h =================================================================== --- head/sys/arm64/include/md_var.h (revision 304284) +++ head/sys/arm64/include/md_var.h (revision 304285) @@ -1,55 +1,50 @@ /*- * Copyright (c) 1995 Bruce D. Evans. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: FreeBSD: src/sys/i386/include/md_var.h,v 1.40 2001/07/12 * $FreeBSD$ */ #ifndef _MACHINE_MD_VAR_H_ #define _MACHINE_MD_VAR_H_ extern long Maxmem; extern char sigcode[]; extern int szsigcode; extern uint64_t *vm_page_dump; extern int vm_page_dump_size; struct dumperinfo; extern int busdma_swi_pending; void busdma_swi(void); void dump_add_page(vm_paddr_t); void dump_drop_page(vm_paddr_t); int minidumpsys(struct dumperinfo *); -struct vdso_timehands; -struct timecounter; -extern uint32_t (*arm_cpu_fill_vdso_timehands)(struct vdso_timehands *, - struct timecounter *); - #endif /* !_MACHINE_MD_VAR_H_ */ Index: head/sys/arm64/include/vdso.h =================================================================== --- head/sys/arm64/include/vdso.h (revision 304284) +++ head/sys/arm64/include/vdso.h (revision 304285) @@ -1,35 +1,37 @@ /*- * Copyright 2012 Konstantin Belousov . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _MACHINE_VDSO_H_ #define _MACHINE_VDSO_H_ #define VDSO_TIMEHANDS_MD \ uint32_t th_physical; \ uint32_t th_res[7]; +#define VDSO_TH_ALGO_ARM_GENTIM VDSO_TH_ALGO_1 + #endif /* !_MACHINE_VDSO_H_ */ Index: head/sys/dev/acpica/acpi_hpet.c =================================================================== --- head/sys/dev/acpica/acpi_hpet.c (revision 304284) +++ head/sys/dev/acpica/acpi_hpet.c (revision 304285) @@ -1,951 +1,987 @@ /*- * Copyright (c) 2005 Poul-Henning Kamp * Copyright (c) 2010 Alexander Motin * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_acpi.h" +#include "opt_compat.h" + #if defined(__amd64__) #define DEV_APIC #else #include "opt_apic.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #ifdef DEV_APIC #include "pcib_if.h" #endif #define HPET_VENDID_AMD 0x4353 #define HPET_VENDID_AMD2 0x1022 #define HPET_VENDID_INTEL 0x8086 #define HPET_VENDID_NVIDIA 0x10de #define HPET_VENDID_SW 0x1166 ACPI_SERIAL_DECL(hpet, "ACPI HPET support"); static devclass_t hpet_devclass; /* ACPI CA debugging */ #define _COMPONENT ACPI_TIMER ACPI_MODULE_NAME("HPET") struct hpet_softc { device_t dev; int mem_rid; int intr_rid; int irq; int useirq; int legacy_route; int per_cpu; uint32_t allowed_irqs; struct resource *mem_res; struct resource *intr_res; void *intr_handle; ACPI_HANDLE handle; uint32_t acpi_uid; uint64_t freq; uint32_t caps; struct timecounter tc; struct hpet_timer { struct eventtimer et; struct hpet_softc *sc; int num; int mode; int intr_rid; int irq; int pcpu_cpu; int pcpu_misrouted; int pcpu_master; int pcpu_slaves[MAXCPU]; struct resource *intr_res; void *intr_handle; uint32_t caps; uint32_t vectors; uint32_t div; uint32_t next; char name[8]; } t[32]; int num_timers; struct cdev *pdev; int mmap_allow; int mmap_allow_write; }; static d_open_t hpet_open; static d_mmap_t hpet_mmap; static struct cdevsw hpet_cdevsw = { .d_version = D_VERSION, .d_name = "hpet", .d_open = hpet_open, .d_mmap = hpet_mmap, }; static u_int hpet_get_timecount(struct timecounter *tc); static void hpet_test(struct hpet_softc *sc); static char *hpet_ids[] = { "PNP0103", NULL }; /* Knob to disable acpi_hpet device */ bool acpi_hpet_disabled = false; static u_int hpet_get_timecount(struct timecounter *tc) { struct hpet_softc *sc; sc = tc->tc_priv; return (bus_read_4(sc->mem_res, HPET_MAIN_COUNTER)); } +uint32_t +hpet_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc) +{ + struct hpet_softc *sc; + + sc = tc->tc_priv; + vdso_th->th_algo = VDSO_TH_ALGO_X86_HPET; + vdso_th->th_x86_shift = 0; + vdso_th->th_x86_hpet_idx = device_get_unit(sc->dev); + bzero(vdso_th->th_res, sizeof(vdso_th->th_res)); + return (sc->mmap_allow != 0); +} + +#ifdef COMPAT_FREEBSD32 +uint32_t +hpet_vdso_timehands32(struct vdso_timehands32 *vdso_th32, + struct timecounter *tc) +{ + struct hpet_softc *sc; + + sc = tc->tc_priv; + vdso_th32->th_algo = VDSO_TH_ALGO_X86_HPET; + vdso_th32->th_x86_shift = 0; + vdso_th32->th_x86_hpet_idx = device_get_unit(sc->dev); + bzero(vdso_th32->th_res, sizeof(vdso_th32->th_res)); + return (sc->mmap_allow != 0); +} +#endif + static void hpet_enable(struct hpet_softc *sc) { uint32_t val; val = bus_read_4(sc->mem_res, HPET_CONFIG); if (sc->legacy_route) val |= HPET_CNF_LEG_RT; else val &= ~HPET_CNF_LEG_RT; val |= HPET_CNF_ENABLE; bus_write_4(sc->mem_res, HPET_CONFIG, val); } static void hpet_disable(struct hpet_softc *sc) { uint32_t val; val = bus_read_4(sc->mem_res, HPET_CONFIG); val &= ~HPET_CNF_ENABLE; bus_write_4(sc->mem_res, HPET_CONFIG, val); } static int hpet_start(struct eventtimer *et, sbintime_t first, sbintime_t period) { struct hpet_timer *mt = (struct hpet_timer *)et->et_priv; struct hpet_timer *t; struct hpet_softc *sc = mt->sc; uint32_t fdiv, now; t = (mt->pcpu_master < 0) ? mt : &sc->t[mt->pcpu_slaves[curcpu]]; if (period != 0) { t->mode = 1; t->div = (sc->freq * period) >> 32; } else { t->mode = 2; t->div = 0; } if (first != 0) fdiv = (sc->freq * first) >> 32; else fdiv = t->div; if (t->irq < 0) bus_write_4(sc->mem_res, HPET_ISR, 1 << t->num); t->caps |= HPET_TCNF_INT_ENB; now = bus_read_4(sc->mem_res, HPET_MAIN_COUNTER); restart: t->next = now + fdiv; if (t->mode == 1 && (t->caps & HPET_TCAP_PER_INT)) { t->caps |= HPET_TCNF_TYPE; bus_write_4(sc->mem_res, HPET_TIMER_CAP_CNF(t->num), t->caps | HPET_TCNF_VAL_SET); bus_write_4(sc->mem_res, HPET_TIMER_COMPARATOR(t->num), t->next); bus_write_4(sc->mem_res, HPET_TIMER_COMPARATOR(t->num), t->div); } else { t->caps &= ~HPET_TCNF_TYPE; bus_write_4(sc->mem_res, HPET_TIMER_CAP_CNF(t->num), t->caps); bus_write_4(sc->mem_res, HPET_TIMER_COMPARATOR(t->num), t->next); } now = bus_read_4(sc->mem_res, HPET_MAIN_COUNTER); if ((int32_t)(now - t->next + HPET_MIN_CYCLES) >= 0) { fdiv *= 2; goto restart; } return (0); } static int hpet_stop(struct eventtimer *et) { struct hpet_timer *mt = (struct hpet_timer *)et->et_priv; struct hpet_timer *t; struct hpet_softc *sc = mt->sc; t = (mt->pcpu_master < 0) ? mt : &sc->t[mt->pcpu_slaves[curcpu]]; t->mode = 0; t->caps &= ~(HPET_TCNF_INT_ENB | HPET_TCNF_TYPE); bus_write_4(sc->mem_res, HPET_TIMER_CAP_CNF(t->num), t->caps); return (0); } static int hpet_intr_single(void *arg) { struct hpet_timer *t = (struct hpet_timer *)arg; struct hpet_timer *mt; struct hpet_softc *sc = t->sc; uint32_t now; if (t->mode == 0) return (FILTER_STRAY); /* Check that per-CPU timer interrupt reached right CPU. */ if (t->pcpu_cpu >= 0 && t->pcpu_cpu != curcpu) { if ((++t->pcpu_misrouted) % 32 == 0) { printf("HPET interrupt routed to the wrong CPU" " (timer %d CPU %d -> %d)!\n", t->num, t->pcpu_cpu, curcpu); } /* * Reload timer, hoping that next time may be more lucky * (system will manage proper interrupt binding). */ if ((t->mode == 1 && (t->caps & HPET_TCAP_PER_INT) == 0) || t->mode == 2) { t->next = bus_read_4(sc->mem_res, HPET_MAIN_COUNTER) + sc->freq / 8; bus_write_4(sc->mem_res, HPET_TIMER_COMPARATOR(t->num), t->next); } return (FILTER_HANDLED); } if (t->mode == 1 && (t->caps & HPET_TCAP_PER_INT) == 0) { t->next += t->div; now = bus_read_4(sc->mem_res, HPET_MAIN_COUNTER); if ((int32_t)((now + t->div / 2) - t->next) > 0) t->next = now + t->div / 2; bus_write_4(sc->mem_res, HPET_TIMER_COMPARATOR(t->num), t->next); } else if (t->mode == 2) t->mode = 0; mt = (t->pcpu_master < 0) ? t : &sc->t[t->pcpu_master]; if (mt->et.et_active) mt->et.et_event_cb(&mt->et, mt->et.et_arg); return (FILTER_HANDLED); } static int hpet_intr(void *arg) { struct hpet_softc *sc = (struct hpet_softc *)arg; int i; uint32_t val; val = bus_read_4(sc->mem_res, HPET_ISR); if (val) { bus_write_4(sc->mem_res, HPET_ISR, val); val &= sc->useirq; for (i = 0; i < sc->num_timers; i++) { if ((val & (1 << i)) == 0) continue; hpet_intr_single(&sc->t[i]); } return (FILTER_HANDLED); } return (FILTER_STRAY); } uint32_t hpet_get_uid(device_t dev) { struct hpet_softc *sc; sc = device_get_softc(dev); return (sc->acpi_uid); } static ACPI_STATUS hpet_find(ACPI_HANDLE handle, UINT32 level, void *context, void **status) { char **ids; uint32_t id = (uint32_t)(uintptr_t)context; uint32_t uid = 0; for (ids = hpet_ids; *ids != NULL; ids++) { if (acpi_MatchHid(handle, *ids)) break; } if (*ids == NULL) return (AE_OK); if (ACPI_FAILURE(acpi_GetInteger(handle, "_UID", &uid)) || id == uid) *status = acpi_get_device(handle); return (AE_OK); } /* * Find an existing IRQ resource that matches the requested IRQ range * and return its RID. If one is not found, use a new RID. */ static int hpet_find_irq_rid(device_t dev, u_long start, u_long end) { rman_res_t irq; int error, rid; for (rid = 0;; rid++) { error = bus_get_resource(dev, SYS_RES_IRQ, rid, &irq, NULL); if (error != 0 || (start <= irq && irq <= end)) return (rid); } } static int hpet_open(struct cdev *cdev, int oflags, int devtype, struct thread *td) { struct hpet_softc *sc; sc = cdev->si_drv1; if (!sc->mmap_allow) return (EPERM); else return (0); } static int hpet_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr, int nprot, vm_memattr_t *memattr) { struct hpet_softc *sc; sc = cdev->si_drv1; if (offset > rman_get_size(sc->mem_res)) return (EINVAL); if (!sc->mmap_allow_write && (nprot & PROT_WRITE)) return (EPERM); *paddr = rman_get_start(sc->mem_res) + offset; *memattr = VM_MEMATTR_UNCACHEABLE; return (0); } /* Discover the HPET via the ACPI table of the same name. */ static void hpet_identify(driver_t *driver, device_t parent) { ACPI_TABLE_HPET *hpet; ACPI_STATUS status; device_t child; int i; /* Only one HPET device can be added. */ if (devclass_get_device(hpet_devclass, 0)) return; for (i = 1; ; i++) { /* Search for HPET table. */ status = AcpiGetTable(ACPI_SIG_HPET, i, (ACPI_TABLE_HEADER **)&hpet); if (ACPI_FAILURE(status)) return; /* Search for HPET device with same ID. */ child = NULL; AcpiWalkNamespace(ACPI_TYPE_DEVICE, ACPI_ROOT_OBJECT, 100, hpet_find, NULL, (void *)(uintptr_t)hpet->Sequence, (void *)&child); /* If found - let it be probed in normal way. */ if (child) { if (bus_get_resource(child, SYS_RES_MEMORY, 0, NULL, NULL) != 0) bus_set_resource(child, SYS_RES_MEMORY, 0, hpet->Address.Address, HPET_MEM_WIDTH); continue; } /* If not - create it from table info. */ child = BUS_ADD_CHILD(parent, 2, "hpet", 0); if (child == NULL) { printf("%s: can't add child\n", __func__); continue; } bus_set_resource(child, SYS_RES_MEMORY, 0, hpet->Address.Address, HPET_MEM_WIDTH); } } static int hpet_probe(device_t dev) { ACPI_FUNCTION_TRACE((char *)(uintptr_t) __func__); if (acpi_disabled("hpet") || acpi_hpet_disabled) return (ENXIO); if (acpi_get_handle(dev) != NULL && ACPI_ID_PROBE(device_get_parent(dev), dev, hpet_ids) == NULL) return (ENXIO); device_set_desc(dev, "High Precision Event Timer"); return (0); } static int hpet_attach(device_t dev) { struct hpet_softc *sc; struct hpet_timer *t; struct make_dev_args mda; int i, j, num_msi, num_timers, num_percpu_et, num_percpu_t, cur_cpu; int pcpu_master, error; static int maxhpetet = 0; uint32_t val, val2, cvectors, dvectors; uint16_t vendor, rev; ACPI_FUNCTION_TRACE((char *)(uintptr_t) __func__); sc = device_get_softc(dev); sc->dev = dev; sc->handle = acpi_get_handle(dev); sc->mem_rid = 0; sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->mem_rid, RF_ACTIVE); if (sc->mem_res == NULL) return (ENOMEM); /* Validate that we can access the whole region. */ if (rman_get_size(sc->mem_res) < HPET_MEM_WIDTH) { device_printf(dev, "memory region width %jd too small\n", rman_get_size(sc->mem_res)); bus_free_resource(dev, SYS_RES_MEMORY, sc->mem_res); return (ENXIO); } /* Be sure timer is enabled. */ hpet_enable(sc); /* Read basic statistics about the timer. */ val = bus_read_4(sc->mem_res, HPET_PERIOD); if (val == 0) { device_printf(dev, "invalid period\n"); hpet_disable(sc); bus_free_resource(dev, SYS_RES_MEMORY, sc->mem_res); return (ENXIO); } sc->freq = (1000000000000000LL + val / 2) / val; sc->caps = bus_read_4(sc->mem_res, HPET_CAPABILITIES); vendor = (sc->caps & HPET_CAP_VENDOR_ID) >> 16; rev = sc->caps & HPET_CAP_REV_ID; num_timers = 1 + ((sc->caps & HPET_CAP_NUM_TIM) >> 8); /* * ATI/AMD violates IA-PC HPET (High Precision Event Timers) * Specification and provides an off by one number * of timers/comparators. * Additionally, they use unregistered value in VENDOR_ID field. */ if (vendor == HPET_VENDID_AMD && rev < 0x10 && num_timers > 0) num_timers--; sc->num_timers = num_timers; if (bootverbose) { device_printf(dev, "vendor 0x%x, rev 0x%x, %jdHz%s, %d timers,%s\n", vendor, rev, sc->freq, (sc->caps & HPET_CAP_COUNT_SIZE) ? " 64bit" : "", num_timers, (sc->caps & HPET_CAP_LEG_RT) ? " legacy route" : ""); } for (i = 0; i < num_timers; i++) { t = &sc->t[i]; t->sc = sc; t->num = i; t->mode = 0; t->intr_rid = -1; t->irq = -1; t->pcpu_cpu = -1; t->pcpu_misrouted = 0; t->pcpu_master = -1; t->caps = bus_read_4(sc->mem_res, HPET_TIMER_CAP_CNF(i)); t->vectors = bus_read_4(sc->mem_res, HPET_TIMER_CAP_CNF(i) + 4); if (bootverbose) { device_printf(dev, " t%d: irqs 0x%08x (%d)%s%s%s\n", i, t->vectors, (t->caps & HPET_TCNF_INT_ROUTE) >> 9, (t->caps & HPET_TCAP_FSB_INT_DEL) ? ", MSI" : "", (t->caps & HPET_TCAP_SIZE) ? ", 64bit" : "", (t->caps & HPET_TCAP_PER_INT) ? ", periodic" : ""); } } if (testenv("debug.acpi.hpet_test")) hpet_test(sc); /* * Don't attach if the timer never increments. Since the spec * requires it to be at least 10 MHz, it has to change in 1 us. */ val = bus_read_4(sc->mem_res, HPET_MAIN_COUNTER); DELAY(1); val2 = bus_read_4(sc->mem_res, HPET_MAIN_COUNTER); if (val == val2) { device_printf(dev, "HPET never increments, disabling\n"); hpet_disable(sc); bus_free_resource(dev, SYS_RES_MEMORY, sc->mem_res); return (ENXIO); } /* Announce first HPET as timecounter. */ if (device_get_unit(dev) == 0) { sc->tc.tc_get_timecount = hpet_get_timecount, sc->tc.tc_counter_mask = ~0u, sc->tc.tc_name = "HPET", sc->tc.tc_quality = 950, sc->tc.tc_frequency = sc->freq; sc->tc.tc_priv = sc; + sc->tc.tc_fill_vdso_timehands = hpet_vdso_timehands; +#ifdef COMPAT_FREEBSD32 + sc->tc.tc_fill_vdso_timehands32 = hpet_vdso_timehands32; +#endif tc_init(&sc->tc); } /* If not disabled - setup and announce event timers. */ if (resource_int_value(device_get_name(dev), device_get_unit(dev), "clock", &i) == 0 && i == 0) return (0); /* Check whether we can and want legacy routing. */ sc->legacy_route = 0; resource_int_value(device_get_name(dev), device_get_unit(dev), "legacy_route", &sc->legacy_route); if ((sc->caps & HPET_CAP_LEG_RT) == 0) sc->legacy_route = 0; if (sc->legacy_route) { sc->t[0].vectors = 0; sc->t[1].vectors = 0; } /* Check what IRQs we want use. */ /* By default allow any PCI IRQs. */ sc->allowed_irqs = 0xffff0000; /* * HPETs in AMD chipsets before SB800 have problems with IRQs >= 16 * Lower are also not always working for different reasons. * SB800 fixed it, but seems do not implements level triggering * properly, that makes it very unreliable - it freezes after any * interrupt loss. Avoid legacy IRQs for AMD. */ if (vendor == HPET_VENDID_AMD || vendor == HPET_VENDID_AMD2) sc->allowed_irqs = 0x00000000; /* * NVidia MCP5x chipsets have number of unexplained interrupt * problems. For some reason, using HPET interrupts breaks HDA sound. */ if (vendor == HPET_VENDID_NVIDIA && rev <= 0x01) sc->allowed_irqs = 0x00000000; /* * ServerWorks HT1000 reported to have problems with IRQs >= 16. * Lower IRQs are working, but allowed mask is not set correctly. * Legacy_route mode works fine. */ if (vendor == HPET_VENDID_SW && rev <= 0x01) sc->allowed_irqs = 0x00000000; /* * Neither QEMU nor VirtualBox report supported IRQs correctly. * The only way to use HPET there is to specify IRQs manually * and/or use legacy_route. Legacy_route mode works on both. */ if (vm_guest) sc->allowed_irqs = 0x00000000; /* Let user override. */ resource_int_value(device_get_name(dev), device_get_unit(dev), "allowed_irqs", &sc->allowed_irqs); /* Get how much per-CPU timers we should try to provide. */ sc->per_cpu = 1; resource_int_value(device_get_name(dev), device_get_unit(dev), "per_cpu", &sc->per_cpu); num_msi = 0; sc->useirq = 0; /* Find IRQ vectors for all timers. */ cvectors = sc->allowed_irqs & 0xffff0000; dvectors = sc->allowed_irqs & 0x0000ffff; if (sc->legacy_route) dvectors &= 0x0000fefe; for (i = 0; i < num_timers; i++) { t = &sc->t[i]; if (sc->legacy_route && i < 2) t->irq = (i == 0) ? 0 : 8; #ifdef DEV_APIC else if (t->caps & HPET_TCAP_FSB_INT_DEL) { if ((j = PCIB_ALLOC_MSIX( device_get_parent(device_get_parent(dev)), dev, &t->irq))) { device_printf(dev, "Can't allocate interrupt for t%d: %d\n", i, j); } } #endif else if (dvectors & t->vectors) { t->irq = ffs(dvectors & t->vectors) - 1; dvectors &= ~(1 << t->irq); } if (t->irq >= 0) { t->intr_rid = hpet_find_irq_rid(dev, t->irq, t->irq); t->intr_res = bus_alloc_resource(dev, SYS_RES_IRQ, &t->intr_rid, t->irq, t->irq, 1, RF_ACTIVE); if (t->intr_res == NULL) { t->irq = -1; device_printf(dev, "Can't map interrupt for t%d.\n", i); } else if (bus_setup_intr(dev, t->intr_res, INTR_TYPE_CLK, hpet_intr_single, NULL, t, &t->intr_handle) != 0) { t->irq = -1; device_printf(dev, "Can't setup interrupt for t%d.\n", i); } else { bus_describe_intr(dev, t->intr_res, t->intr_handle, "t%d", i); num_msi++; } } if (t->irq < 0 && (cvectors & t->vectors) != 0) { cvectors &= t->vectors; sc->useirq |= (1 << i); } } if (sc->legacy_route && sc->t[0].irq < 0 && sc->t[1].irq < 0) sc->legacy_route = 0; if (sc->legacy_route) hpet_enable(sc); /* Group timers for per-CPU operation. */ num_percpu_et = min(num_msi / mp_ncpus, sc->per_cpu); num_percpu_t = num_percpu_et * mp_ncpus; pcpu_master = 0; cur_cpu = CPU_FIRST(); for (i = 0; i < num_timers; i++) { t = &sc->t[i]; if (t->irq >= 0 && num_percpu_t > 0) { if (cur_cpu == CPU_FIRST()) pcpu_master = i; t->pcpu_cpu = cur_cpu; t->pcpu_master = pcpu_master; sc->t[pcpu_master]. pcpu_slaves[cur_cpu] = i; bus_bind_intr(dev, t->intr_res, cur_cpu); cur_cpu = CPU_NEXT(cur_cpu); num_percpu_t--; } else if (t->irq >= 0) bus_bind_intr(dev, t->intr_res, CPU_FIRST()); } bus_write_4(sc->mem_res, HPET_ISR, 0xffffffff); sc->irq = -1; /* If at least one timer needs legacy IRQ - set it up. */ if (sc->useirq) { j = i = fls(cvectors) - 1; while (j > 0 && (cvectors & (1 << (j - 1))) != 0) j--; sc->intr_rid = hpet_find_irq_rid(dev, j, i); sc->intr_res = bus_alloc_resource(dev, SYS_RES_IRQ, &sc->intr_rid, j, i, 1, RF_SHAREABLE | RF_ACTIVE); if (sc->intr_res == NULL) device_printf(dev, "Can't map interrupt.\n"); else if (bus_setup_intr(dev, sc->intr_res, INTR_TYPE_CLK, hpet_intr, NULL, sc, &sc->intr_handle) != 0) { device_printf(dev, "Can't setup interrupt.\n"); } else { sc->irq = rman_get_start(sc->intr_res); /* Bind IRQ to BSP to avoid live migration. */ bus_bind_intr(dev, sc->intr_res, CPU_FIRST()); } } /* Program and announce event timers. */ for (i = 0; i < num_timers; i++) { t = &sc->t[i]; t->caps &= ~(HPET_TCNF_FSB_EN | HPET_TCNF_INT_ROUTE); t->caps &= ~(HPET_TCNF_VAL_SET | HPET_TCNF_INT_ENB); t->caps &= ~(HPET_TCNF_INT_TYPE); t->caps |= HPET_TCNF_32MODE; if (t->irq >= 0 && sc->legacy_route && i < 2) { /* Legacy route doesn't need more configuration. */ } else #ifdef DEV_APIC if ((t->caps & HPET_TCAP_FSB_INT_DEL) && t->irq >= 0) { uint64_t addr; uint32_t data; if (PCIB_MAP_MSI( device_get_parent(device_get_parent(dev)), dev, t->irq, &addr, &data) == 0) { bus_write_4(sc->mem_res, HPET_TIMER_FSB_ADDR(i), addr); bus_write_4(sc->mem_res, HPET_TIMER_FSB_VAL(i), data); t->caps |= HPET_TCNF_FSB_EN; } else t->irq = -2; } else #endif if (t->irq >= 0) t->caps |= (t->irq << 9); else if (sc->irq >= 0 && (t->vectors & (1 << sc->irq))) t->caps |= (sc->irq << 9) | HPET_TCNF_INT_TYPE; bus_write_4(sc->mem_res, HPET_TIMER_CAP_CNF(i), t->caps); /* Skip event timers without set up IRQ. */ if (t->irq < 0 && (sc->irq < 0 || (t->vectors & (1 << sc->irq)) == 0)) continue; /* Announce the reset. */ if (maxhpetet == 0) t->et.et_name = "HPET"; else { sprintf(t->name, "HPET%d", maxhpetet); t->et.et_name = t->name; } t->et.et_flags = ET_FLAGS_PERIODIC | ET_FLAGS_ONESHOT; t->et.et_quality = 450; if (t->pcpu_master >= 0) { t->et.et_flags |= ET_FLAGS_PERCPU; t->et.et_quality += 100; } else if (mp_ncpus >= 8) t->et.et_quality -= 100; if ((t->caps & HPET_TCAP_PER_INT) == 0) t->et.et_quality -= 10; t->et.et_frequency = sc->freq; t->et.et_min_period = ((uint64_t)(HPET_MIN_CYCLES * 2) << 32) / sc->freq; t->et.et_max_period = (0xfffffffeLLU << 32) / sc->freq; t->et.et_start = hpet_start; t->et.et_stop = hpet_stop; t->et.et_priv = &sc->t[i]; if (t->pcpu_master < 0 || t->pcpu_master == i) { et_register(&t->et); maxhpetet++; } } acpi_GetInteger(sc->handle, "_UID", &sc->acpi_uid); make_dev_args_init(&mda); mda.mda_devsw = &hpet_cdevsw; mda.mda_uid = UID_ROOT; mda.mda_gid = GID_WHEEL; mda.mda_mode = 0644; mda.mda_si_drv1 = sc; error = make_dev_s(&mda, &sc->pdev, "hpet%d", device_get_unit(dev)); if (error == 0) { sc->mmap_allow = 1; TUNABLE_INT_FETCH("hw.acpi.hpet.mmap_allow", &sc->mmap_allow); sc->mmap_allow_write = 0; TUNABLE_INT_FETCH("hw.acpi.hpet.mmap_allow_write", &sc->mmap_allow_write); SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "mmap_allow", CTLFLAG_RW, &sc->mmap_allow, 0, "Allow userland to memory map HPET"); SYSCTL_ADD_INT(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO, "mmap_allow_write", CTLFLAG_RW, &sc->mmap_allow_write, 0, "Allow userland write to the HPET register space"); } else { device_printf(dev, "could not create /dev/hpet%d, error %d\n", device_get_unit(dev), error); } return (0); } static int hpet_detach(device_t dev) { ACPI_FUNCTION_TRACE((char *)(uintptr_t) __func__); /* XXX Without a tc_remove() function, we can't detach. */ return (EBUSY); } static int hpet_suspend(device_t dev) { // struct hpet_softc *sc; /* * Disable the timer during suspend. The timer will not lose * its state in S1 or S2, but we are required to disable * it. */ // sc = device_get_softc(dev); // hpet_disable(sc); return (0); } static int hpet_resume(device_t dev) { struct hpet_softc *sc; struct hpet_timer *t; int i; /* Re-enable the timer after a resume to keep the clock advancing. */ sc = device_get_softc(dev); hpet_enable(sc); /* Restart event timers that were running on suspend. */ for (i = 0; i < sc->num_timers; i++) { t = &sc->t[i]; #ifdef DEV_APIC if (t->irq >= 0 && (sc->legacy_route == 0 || i >= 2)) { uint64_t addr; uint32_t data; if (PCIB_MAP_MSI( device_get_parent(device_get_parent(dev)), dev, t->irq, &addr, &data) == 0) { bus_write_4(sc->mem_res, HPET_TIMER_FSB_ADDR(i), addr); bus_write_4(sc->mem_res, HPET_TIMER_FSB_VAL(i), data); } } #endif if (t->mode == 0) continue; t->next = bus_read_4(sc->mem_res, HPET_MAIN_COUNTER); if (t->mode == 1 && (t->caps & HPET_TCAP_PER_INT)) { t->caps |= HPET_TCNF_TYPE; t->next += t->div; bus_write_4(sc->mem_res, HPET_TIMER_CAP_CNF(t->num), t->caps | HPET_TCNF_VAL_SET); bus_write_4(sc->mem_res, HPET_TIMER_COMPARATOR(t->num), t->next); bus_read_4(sc->mem_res, HPET_TIMER_COMPARATOR(t->num)); bus_write_4(sc->mem_res, HPET_TIMER_COMPARATOR(t->num), t->div); } else { t->next += sc->freq / 1024; bus_write_4(sc->mem_res, HPET_TIMER_COMPARATOR(t->num), t->next); } bus_write_4(sc->mem_res, HPET_ISR, 1 << t->num); bus_write_4(sc->mem_res, HPET_TIMER_CAP_CNF(t->num), t->caps); } return (0); } /* Print some basic latency/rate information to assist in debugging. */ static void hpet_test(struct hpet_softc *sc) { int i; uint32_t u1, u2; struct bintime b0, b1, b2; struct timespec ts; binuptime(&b0); binuptime(&b0); binuptime(&b1); u1 = bus_read_4(sc->mem_res, HPET_MAIN_COUNTER); for (i = 1; i < 1000; i++) u2 = bus_read_4(sc->mem_res, HPET_MAIN_COUNTER); binuptime(&b2); u2 = bus_read_4(sc->mem_res, HPET_MAIN_COUNTER); bintime_sub(&b2, &b1); bintime_sub(&b1, &b0); bintime_sub(&b2, &b1); bintime2timespec(&b2, &ts); device_printf(sc->dev, "%ld.%09ld: %u ... %u = %u\n", (long)ts.tv_sec, ts.tv_nsec, u1, u2, u2 - u1); device_printf(sc->dev, "time per call: %ld ns\n", ts.tv_nsec / 1000); } #ifdef DEV_APIC static int hpet_remap_intr(device_t dev, device_t child, u_int irq) { struct hpet_softc *sc = device_get_softc(dev); struct hpet_timer *t; uint64_t addr; uint32_t data; int error, i; for (i = 0; i < sc->num_timers; i++) { t = &sc->t[i]; if (t->irq != irq) continue; error = PCIB_MAP_MSI( device_get_parent(device_get_parent(dev)), dev, irq, &addr, &data); if (error) return (error); hpet_disable(sc); /* Stop timer to avoid interrupt loss. */ bus_write_4(sc->mem_res, HPET_TIMER_FSB_ADDR(i), addr); bus_write_4(sc->mem_res, HPET_TIMER_FSB_VAL(i), data); hpet_enable(sc); return (0); } return (ENOENT); } #endif static device_method_t hpet_methods[] = { /* Device interface */ DEVMETHOD(device_identify, hpet_identify), DEVMETHOD(device_probe, hpet_probe), DEVMETHOD(device_attach, hpet_attach), DEVMETHOD(device_detach, hpet_detach), DEVMETHOD(device_suspend, hpet_suspend), DEVMETHOD(device_resume, hpet_resume), #ifdef DEV_APIC DEVMETHOD(bus_remap_intr, hpet_remap_intr), #endif DEVMETHOD_END }; static driver_t hpet_driver = { "hpet", hpet_methods, sizeof(struct hpet_softc), }; DRIVER_MODULE(hpet, acpi, hpet_driver, hpet_devclass, 0, 0); MODULE_DEPEND(hpet, acpi, 1, 1, 1); Index: head/sys/dev/acpica/acpi_hpet.h =================================================================== --- head/sys/dev/acpica/acpi_hpet.h (revision 304284) +++ head/sys/dev/acpica/acpi_hpet.h (revision 304285) @@ -1,67 +1,78 @@ /*- * Copyright (c) 2005 Poul-Henning Kamp * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef __ACPI_HPET_H__ #define __ACPI_HPET_H__ #define HPET_MEM_WIDTH 0x400 /* Expected memory region size */ /* General registers */ #define HPET_CAPABILITIES 0x0 /* General capabilities and ID */ #define HPET_CAP_VENDOR_ID 0xffff0000 #define HPET_CAP_LEG_RT 0x00008000 #define HPET_CAP_COUNT_SIZE 0x00002000 /* 1 = 64-bit, 0 = 32-bit */ #define HPET_CAP_NUM_TIM 0x00001f00 #define HPET_CAP_REV_ID 0x000000ff #define HPET_PERIOD 0x4 /* Period (1/hz) of timer */ #define HPET_CONFIG 0x10 /* General configuration register */ #define HPET_CNF_LEG_RT 0x00000002 #define HPET_CNF_ENABLE 0x00000001 #define HPET_ISR 0x20 /* General interrupt status register */ #define HPET_MAIN_COUNTER 0xf0 /* Main counter register */ /* Timer registers */ #define HPET_TIMER_CAP_CNF(x) ((x) * 0x20 + 0x100) #define HPET_TCAP_INT_ROUTE 0xffffffff00000000 #define HPET_TCAP_FSB_INT_DEL 0x00008000 #define HPET_TCNF_FSB_EN 0x00004000 #define HPET_TCNF_INT_ROUTE 0x00003e00 #define HPET_TCNF_32MODE 0x00000100 #define HPET_TCNF_VAL_SET 0x00000040 #define HPET_TCAP_SIZE 0x00000020 /* 1 = 64-bit, 0 = 32-bit */ #define HPET_TCAP_PER_INT 0x00000010 /* Supports periodic interrupts */ #define HPET_TCNF_TYPE 0x00000008 /* 1 = periodic, 0 = one-shot */ #define HPET_TCNF_INT_ENB 0x00000004 #define HPET_TCNF_INT_TYPE 0x00000002 /* 1 = level triggered, 0 = edge */ #define HPET_TIMER_COMPARATOR(x) ((x) * 0x20 + 0x108) #define HPET_TIMER_FSB_VAL(x) ((x) * 0x20 + 0x110) #define HPET_TIMER_FSB_ADDR(x) ((x) * 0x20 + 0x114) #define HPET_MIN_CYCLES 128 /* Period considered reliable. */ +#ifdef _KERNEL +struct timecounter; +struct vdso_timehands; +struct vdso_timehands32; + +uint32_t hpet_vdso_timehands(struct vdso_timehands *vdso_th, + struct timecounter *tc); +uint32_t hpet_vdso_timehands32(struct vdso_timehands32 *vdso_th32, + struct timecounter *tc); +#endif + #endif /* !__ACPI_HPET_H__ */ Index: head/sys/kern/kern_tc.c =================================================================== --- head/sys/kern/kern_tc.c (revision 304284) +++ head/sys/kern/kern_tc.c (revision 304285) @@ -1,2166 +1,2175 @@ /*- * ---------------------------------------------------------------------------- * "THE BEER-WARE LICENSE" (Revision 42): * wrote this file. As long as you retain this notice you * can do whatever you want with this stuff. If we meet some day, and you think * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * - * Copyright (c) 2011 The FreeBSD Foundation + * Copyright (c) 2011, 2015, 2016 The FreeBSD Foundation * All rights reserved. * * Portions of this software were developed by Julien Ridoux at the University * of Melbourne under sponsorship from the FreeBSD Foundation. + * + * Portions of this software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ntp.h" #include "opt_ffclock.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * A large step happens on boot. This constant detects such steps. * It is relatively small so that ntp_update_second gets called enough * in the typical 'missed a couple of seconds' case, but doesn't loop * forever when the time step is large. */ #define LARGE_STEP 200 /* * Implement a dummy timecounter which we can use until we get a real one * in the air. This allows the console and other early stuff to use * time services. */ static u_int dummy_get_timecount(struct timecounter *tc) { static u_int now; return (++now); } static struct timecounter dummy_timecounter = { dummy_get_timecount, 0, ~0u, 1000000, "dummy", -1000000 }; struct timehands { /* These fields must be initialized by the driver. */ struct timecounter *th_counter; int64_t th_adjustment; uint64_t th_scale; u_int th_offset_count; struct bintime th_offset; struct bintime th_bintime; struct timeval th_microtime; struct timespec th_nanotime; struct bintime th_boottime; /* Fields not to be copied in tc_windup start with th_generation. */ u_int th_generation; struct timehands *th_next; }; static struct timehands th0; static struct timehands th1 = { .th_next = &th0 }; static struct timehands th0 = { .th_counter = &dummy_timecounter, .th_scale = (uint64_t)-1 / 1000000, .th_offset = { .sec = 1 }, .th_generation = 1, .th_next = &th1 }; static struct timehands *volatile timehands = &th0; struct timecounter *timecounter = &dummy_timecounter; static struct timecounter *timecounters = &dummy_timecounter; int tc_min_ticktock_freq = 1; volatile time_t time_second = 1; volatile time_t time_uptime = 1; static int sysctl_kern_boottime(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_kern, KERN_BOOTTIME, boottime, CTLTYPE_STRUCT|CTLFLAG_RD, NULL, 0, sysctl_kern_boottime, "S,timeval", "System boottime"); SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, ""); static SYSCTL_NODE(_kern_timecounter, OID_AUTO, tc, CTLFLAG_RW, 0, ""); static int timestepwarnings; SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW, ×tepwarnings, 0, "Log time steps"); struct bintime bt_timethreshold; struct bintime bt_tickthreshold; sbintime_t sbt_timethreshold; sbintime_t sbt_tickthreshold; struct bintime tc_tick_bt; sbintime_t tc_tick_sbt; int tc_precexp; int tc_timepercentage = TC_DEFAULTPERC; static int sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS); SYSCTL_PROC(_kern_timecounter, OID_AUTO, alloweddeviation, CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0, sysctl_kern_timecounter_adjprecision, "I", "Allowed time interval deviation in percents"); static int tc_chosen; /* Non-zero if a specific tc was chosen via sysctl. */ static void tc_windup(struct bintime *new_boottimebin); static void cpu_tick_calibrate(int); void dtrace_getnanotime(struct timespec *tsp); static int sysctl_kern_boottime(SYSCTL_HANDLER_ARGS) { struct timeval boottime; getboottime(&boottime); #ifndef __mips__ #ifdef SCTL_MASK32 int tv[2]; if (req->flags & SCTL_MASK32) { tv[0] = boottime.tv_sec; tv[1] = boottime.tv_usec; return (SYSCTL_OUT(req, tv, sizeof(tv))); } #endif #endif return (SYSCTL_OUT(req, &boottime, sizeof(boottime))); } static int sysctl_kern_timecounter_get(SYSCTL_HANDLER_ARGS) { u_int ncount; struct timecounter *tc = arg1; ncount = tc->tc_get_timecount(tc); return (sysctl_handle_int(oidp, &ncount, 0, req)); } static int sysctl_kern_timecounter_freq(SYSCTL_HANDLER_ARGS) { uint64_t freq; struct timecounter *tc = arg1; freq = tc->tc_frequency; return (sysctl_handle_64(oidp, &freq, 0, req)); } /* * Return the difference between the timehands' counter value now and what * was when we copied it to the timehands' offset_count. */ static __inline u_int tc_delta(struct timehands *th) { struct timecounter *tc; tc = th->th_counter; return ((tc->tc_get_timecount(tc) - th->th_offset_count) & tc->tc_counter_mask); } /* * Functions for reading the time. We have to loop until we are sure that * the timehands that we operated on was not updated under our feet. See * the comment in for a description of these 12 functions. */ #ifdef FFCLOCK void fbclock_binuptime(struct bintime *bt) { struct timehands *th; unsigned int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *bt = th->th_offset; bintime_addx(bt, th->th_scale * tc_delta(th)); atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void fbclock_nanouptime(struct timespec *tsp) { struct bintime bt; fbclock_binuptime(&bt); bintime2timespec(&bt, tsp); } void fbclock_microuptime(struct timeval *tvp) { struct bintime bt; fbclock_binuptime(&bt); bintime2timeval(&bt, tvp); } void fbclock_bintime(struct bintime *bt) { struct timehands *th; unsigned int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *bt = th->th_bintime; bintime_addx(bt, th->th_scale * tc_delta(th)); atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void fbclock_nanotime(struct timespec *tsp) { struct bintime bt; fbclock_bintime(&bt); bintime2timespec(&bt, tsp); } void fbclock_microtime(struct timeval *tvp) { struct bintime bt; fbclock_bintime(&bt); bintime2timeval(&bt, tvp); } void fbclock_getbinuptime(struct bintime *bt) { struct timehands *th; unsigned int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *bt = th->th_offset; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void fbclock_getnanouptime(struct timespec *tsp) { struct timehands *th; unsigned int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); bintime2timespec(&th->th_offset, tsp); atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void fbclock_getmicrouptime(struct timeval *tvp) { struct timehands *th; unsigned int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); bintime2timeval(&th->th_offset, tvp); atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void fbclock_getbintime(struct bintime *bt) { struct timehands *th; unsigned int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *bt = th->th_bintime; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void fbclock_getnanotime(struct timespec *tsp) { struct timehands *th; unsigned int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *tsp = th->th_nanotime; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void fbclock_getmicrotime(struct timeval *tvp) { struct timehands *th; unsigned int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *tvp = th->th_microtime; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } #else /* !FFCLOCK */ void binuptime(struct bintime *bt) { struct timehands *th; u_int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *bt = th->th_offset; bintime_addx(bt, th->th_scale * tc_delta(th)); atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void nanouptime(struct timespec *tsp) { struct bintime bt; binuptime(&bt); bintime2timespec(&bt, tsp); } void microuptime(struct timeval *tvp) { struct bintime bt; binuptime(&bt); bintime2timeval(&bt, tvp); } void bintime(struct bintime *bt) { struct timehands *th; u_int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *bt = th->th_bintime; bintime_addx(bt, th->th_scale * tc_delta(th)); atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void nanotime(struct timespec *tsp) { struct bintime bt; bintime(&bt); bintime2timespec(&bt, tsp); } void microtime(struct timeval *tvp) { struct bintime bt; bintime(&bt); bintime2timeval(&bt, tvp); } void getbinuptime(struct bintime *bt) { struct timehands *th; u_int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *bt = th->th_offset; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void getnanouptime(struct timespec *tsp) { struct timehands *th; u_int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); bintime2timespec(&th->th_offset, tsp); atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void getmicrouptime(struct timeval *tvp) { struct timehands *th; u_int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); bintime2timeval(&th->th_offset, tvp); atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void getbintime(struct bintime *bt) { struct timehands *th; u_int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *bt = th->th_bintime; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void getnanotime(struct timespec *tsp) { struct timehands *th; u_int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *tsp = th->th_nanotime; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } void getmicrotime(struct timeval *tvp) { struct timehands *th; u_int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *tvp = th->th_microtime; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } #endif /* FFCLOCK */ void getboottime(struct timeval *boottime) { struct bintime boottimebin; getboottimebin(&boottimebin); bintime2timeval(&boottimebin, boottime); } void getboottimebin(struct bintime *boottimebin) { struct timehands *th; u_int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *boottimebin = th->th_boottime; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } #ifdef FFCLOCK /* * Support for feed-forward synchronization algorithms. This is heavily inspired * by the timehands mechanism but kept independent from it. *_windup() functions * have some connection to avoid accessing the timecounter hardware more than * necessary. */ /* Feed-forward clock estimates kept updated by the synchronization daemon. */ struct ffclock_estimate ffclock_estimate; struct bintime ffclock_boottime; /* Feed-forward boot time estimate. */ uint32_t ffclock_status; /* Feed-forward clock status. */ int8_t ffclock_updated; /* New estimates are available. */ struct mtx ffclock_mtx; /* Mutex on ffclock_estimate. */ struct fftimehands { struct ffclock_estimate cest; struct bintime tick_time; struct bintime tick_time_lerp; ffcounter tick_ffcount; uint64_t period_lerp; volatile uint8_t gen; struct fftimehands *next; }; #define NUM_ELEMENTS(x) (sizeof(x) / sizeof(*x)) static struct fftimehands ffth[10]; static struct fftimehands *volatile fftimehands = ffth; static void ffclock_init(void) { struct fftimehands *cur; struct fftimehands *last; memset(ffth, 0, sizeof(ffth)); last = ffth + NUM_ELEMENTS(ffth) - 1; for (cur = ffth; cur < last; cur++) cur->next = cur + 1; last->next = ffth; ffclock_updated = 0; ffclock_status = FFCLOCK_STA_UNSYNC; mtx_init(&ffclock_mtx, "ffclock lock", NULL, MTX_DEF); } /* * Reset the feed-forward clock estimates. Called from inittodr() to get things * kick started and uses the timecounter nominal frequency as a first period * estimate. Note: this function may be called several time just after boot. * Note: this is the only function that sets the value of boot time for the * monotonic (i.e. uptime) version of the feed-forward clock. */ void ffclock_reset_clock(struct timespec *ts) { struct timecounter *tc; struct ffclock_estimate cest; tc = timehands->th_counter; memset(&cest, 0, sizeof(struct ffclock_estimate)); timespec2bintime(ts, &ffclock_boottime); timespec2bintime(ts, &(cest.update_time)); ffclock_read_counter(&cest.update_ffcount); cest.leapsec_next = 0; cest.period = ((1ULL << 63) / tc->tc_frequency) << 1; cest.errb_abs = 0; cest.errb_rate = 0; cest.status = FFCLOCK_STA_UNSYNC; cest.leapsec_total = 0; cest.leapsec = 0; mtx_lock(&ffclock_mtx); bcopy(&cest, &ffclock_estimate, sizeof(struct ffclock_estimate)); ffclock_updated = INT8_MAX; mtx_unlock(&ffclock_mtx); printf("ffclock reset: %s (%llu Hz), time = %ld.%09lu\n", tc->tc_name, (unsigned long long)tc->tc_frequency, (long)ts->tv_sec, (unsigned long)ts->tv_nsec); } /* * Sub-routine to convert a time interval measured in RAW counter units to time * in seconds stored in bintime format. * NOTE: bintime_mul requires u_int, but the value of the ffcounter may be * larger than the max value of u_int (on 32 bit architecture). Loop to consume * extra cycles. */ static void ffclock_convert_delta(ffcounter ffdelta, uint64_t period, struct bintime *bt) { struct bintime bt2; ffcounter delta, delta_max; delta_max = (1ULL << (8 * sizeof(unsigned int))) - 1; bintime_clear(bt); do { if (ffdelta > delta_max) delta = delta_max; else delta = ffdelta; bt2.sec = 0; bt2.frac = period; bintime_mul(&bt2, (unsigned int)delta); bintime_add(bt, &bt2); ffdelta -= delta; } while (ffdelta > 0); } /* * Update the fftimehands. * Push the tick ffcount and time(s) forward based on current clock estimate. * The conversion from ffcounter to bintime relies on the difference clock * principle, whose accuracy relies on computing small time intervals. If a new * clock estimate has been passed by the synchronisation daemon, make it * current, and compute the linear interpolation for monotonic time if needed. */ static void ffclock_windup(unsigned int delta) { struct ffclock_estimate *cest; struct fftimehands *ffth; struct bintime bt, gap_lerp; ffcounter ffdelta; uint64_t frac; unsigned int polling; uint8_t forward_jump, ogen; /* * Pick the next timehand, copy current ffclock estimates and move tick * times and counter forward. */ forward_jump = 0; ffth = fftimehands->next; ogen = ffth->gen; ffth->gen = 0; cest = &ffth->cest; bcopy(&fftimehands->cest, cest, sizeof(struct ffclock_estimate)); ffdelta = (ffcounter)delta; ffth->period_lerp = fftimehands->period_lerp; ffth->tick_time = fftimehands->tick_time; ffclock_convert_delta(ffdelta, cest->period, &bt); bintime_add(&ffth->tick_time, &bt); ffth->tick_time_lerp = fftimehands->tick_time_lerp; ffclock_convert_delta(ffdelta, ffth->period_lerp, &bt); bintime_add(&ffth->tick_time_lerp, &bt); ffth->tick_ffcount = fftimehands->tick_ffcount + ffdelta; /* * Assess the status of the clock, if the last update is too old, it is * likely the synchronisation daemon is dead and the clock is free * running. */ if (ffclock_updated == 0) { ffdelta = ffth->tick_ffcount - cest->update_ffcount; ffclock_convert_delta(ffdelta, cest->period, &bt); if (bt.sec > 2 * FFCLOCK_SKM_SCALE) ffclock_status |= FFCLOCK_STA_UNSYNC; } /* * If available, grab updated clock estimates and make them current. * Recompute time at this tick using the updated estimates. The clock * estimates passed the feed-forward synchronisation daemon may result * in time conversion that is not monotonically increasing (just after * the update). time_lerp is a particular linear interpolation over the * synchronisation algo polling period that ensures monotonicity for the * clock ids requesting it. */ if (ffclock_updated > 0) { bcopy(&ffclock_estimate, cest, sizeof(struct ffclock_estimate)); ffdelta = ffth->tick_ffcount - cest->update_ffcount; ffth->tick_time = cest->update_time; ffclock_convert_delta(ffdelta, cest->period, &bt); bintime_add(&ffth->tick_time, &bt); /* ffclock_reset sets ffclock_updated to INT8_MAX */ if (ffclock_updated == INT8_MAX) ffth->tick_time_lerp = ffth->tick_time; if (bintime_cmp(&ffth->tick_time, &ffth->tick_time_lerp, >)) forward_jump = 1; else forward_jump = 0; bintime_clear(&gap_lerp); if (forward_jump) { gap_lerp = ffth->tick_time; bintime_sub(&gap_lerp, &ffth->tick_time_lerp); } else { gap_lerp = ffth->tick_time_lerp; bintime_sub(&gap_lerp, &ffth->tick_time); } /* * The reset from the RTC clock may be far from accurate, and * reducing the gap between real time and interpolated time * could take a very long time if the interpolated clock insists * on strict monotonicity. The clock is reset under very strict * conditions (kernel time is known to be wrong and * synchronization daemon has been restarted recently. * ffclock_boottime absorbs the jump to ensure boot time is * correct and uptime functions stay consistent. */ if (((ffclock_status & FFCLOCK_STA_UNSYNC) == FFCLOCK_STA_UNSYNC) && ((cest->status & FFCLOCK_STA_UNSYNC) == 0) && ((cest->status & FFCLOCK_STA_WARMUP) == FFCLOCK_STA_WARMUP)) { if (forward_jump) bintime_add(&ffclock_boottime, &gap_lerp); else bintime_sub(&ffclock_boottime, &gap_lerp); ffth->tick_time_lerp = ffth->tick_time; bintime_clear(&gap_lerp); } ffclock_status = cest->status; ffth->period_lerp = cest->period; /* * Compute corrected period used for the linear interpolation of * time. The rate of linear interpolation is capped to 5000PPM * (5ms/s). */ if (bintime_isset(&gap_lerp)) { ffdelta = cest->update_ffcount; ffdelta -= fftimehands->cest.update_ffcount; ffclock_convert_delta(ffdelta, cest->period, &bt); polling = bt.sec; bt.sec = 0; bt.frac = 5000000 * (uint64_t)18446744073LL; bintime_mul(&bt, polling); if (bintime_cmp(&gap_lerp, &bt, >)) gap_lerp = bt; /* Approximate 1 sec by 1-(1/2^64) to ease arithmetic */ frac = 0; if (gap_lerp.sec > 0) { frac -= 1; frac /= ffdelta / gap_lerp.sec; } frac += gap_lerp.frac / ffdelta; if (forward_jump) ffth->period_lerp += frac; else ffth->period_lerp -= frac; } ffclock_updated = 0; } if (++ogen == 0) ogen = 1; ffth->gen = ogen; fftimehands = ffth; } /* * Adjust the fftimehands when the timecounter is changed. Stating the obvious, * the old and new hardware counter cannot be read simultaneously. tc_windup() * does read the two counters 'back to back', but a few cycles are effectively * lost, and not accumulated in tick_ffcount. This is a fairly radical * operation for a feed-forward synchronization daemon, and it is its job to not * pushing irrelevant data to the kernel. Because there is no locking here, * simply force to ignore pending or next update to give daemon a chance to * realize the counter has changed. */ static void ffclock_change_tc(struct timehands *th) { struct fftimehands *ffth; struct ffclock_estimate *cest; struct timecounter *tc; uint8_t ogen; tc = th->th_counter; ffth = fftimehands->next; ogen = ffth->gen; ffth->gen = 0; cest = &ffth->cest; bcopy(&(fftimehands->cest), cest, sizeof(struct ffclock_estimate)); cest->period = ((1ULL << 63) / tc->tc_frequency ) << 1; cest->errb_abs = 0; cest->errb_rate = 0; cest->status |= FFCLOCK_STA_UNSYNC; ffth->tick_ffcount = fftimehands->tick_ffcount; ffth->tick_time_lerp = fftimehands->tick_time_lerp; ffth->tick_time = fftimehands->tick_time; ffth->period_lerp = cest->period; /* Do not lock but ignore next update from synchronization daemon. */ ffclock_updated--; if (++ogen == 0) ogen = 1; ffth->gen = ogen; fftimehands = ffth; } /* * Retrieve feed-forward counter and time of last kernel tick. */ void ffclock_last_tick(ffcounter *ffcount, struct bintime *bt, uint32_t flags) { struct fftimehands *ffth; uint8_t gen; /* * No locking but check generation has not changed. Also need to make * sure ffdelta is positive, i.e. ffcount > tick_ffcount. */ do { ffth = fftimehands; gen = ffth->gen; if ((flags & FFCLOCK_LERP) == FFCLOCK_LERP) *bt = ffth->tick_time_lerp; else *bt = ffth->tick_time; *ffcount = ffth->tick_ffcount; } while (gen == 0 || gen != ffth->gen); } /* * Absolute clock conversion. Low level function to convert ffcounter to * bintime. The ffcounter is converted using the current ffclock period estimate * or the "interpolated period" to ensure monotonicity. * NOTE: this conversion may have been deferred, and the clock updated since the * hardware counter has been read. */ void ffclock_convert_abs(ffcounter ffcount, struct bintime *bt, uint32_t flags) { struct fftimehands *ffth; struct bintime bt2; ffcounter ffdelta; uint8_t gen; /* * No locking but check generation has not changed. Also need to make * sure ffdelta is positive, i.e. ffcount > tick_ffcount. */ do { ffth = fftimehands; gen = ffth->gen; if (ffcount > ffth->tick_ffcount) ffdelta = ffcount - ffth->tick_ffcount; else ffdelta = ffth->tick_ffcount - ffcount; if ((flags & FFCLOCK_LERP) == FFCLOCK_LERP) { *bt = ffth->tick_time_lerp; ffclock_convert_delta(ffdelta, ffth->period_lerp, &bt2); } else { *bt = ffth->tick_time; ffclock_convert_delta(ffdelta, ffth->cest.period, &bt2); } if (ffcount > ffth->tick_ffcount) bintime_add(bt, &bt2); else bintime_sub(bt, &bt2); } while (gen == 0 || gen != ffth->gen); } /* * Difference clock conversion. * Low level function to Convert a time interval measured in RAW counter units * into bintime. The difference clock allows measuring small intervals much more * reliably than the absolute clock. */ void ffclock_convert_diff(ffcounter ffdelta, struct bintime *bt) { struct fftimehands *ffth; uint8_t gen; /* No locking but check generation has not changed. */ do { ffth = fftimehands; gen = ffth->gen; ffclock_convert_delta(ffdelta, ffth->cest.period, bt); } while (gen == 0 || gen != ffth->gen); } /* * Access to current ffcounter value. */ void ffclock_read_counter(ffcounter *ffcount) { struct timehands *th; struct fftimehands *ffth; unsigned int gen, delta; /* * ffclock_windup() called from tc_windup(), safe to rely on * th->th_generation only, for correct delta and ffcounter. */ do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); ffth = fftimehands; delta = tc_delta(th); *ffcount = ffth->tick_ffcount; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); *ffcount += delta; } void binuptime(struct bintime *bt) { binuptime_fromclock(bt, sysclock_active); } void nanouptime(struct timespec *tsp) { nanouptime_fromclock(tsp, sysclock_active); } void microuptime(struct timeval *tvp) { microuptime_fromclock(tvp, sysclock_active); } void bintime(struct bintime *bt) { bintime_fromclock(bt, sysclock_active); } void nanotime(struct timespec *tsp) { nanotime_fromclock(tsp, sysclock_active); } void microtime(struct timeval *tvp) { microtime_fromclock(tvp, sysclock_active); } void getbinuptime(struct bintime *bt) { getbinuptime_fromclock(bt, sysclock_active); } void getnanouptime(struct timespec *tsp) { getnanouptime_fromclock(tsp, sysclock_active); } void getmicrouptime(struct timeval *tvp) { getmicrouptime_fromclock(tvp, sysclock_active); } void getbintime(struct bintime *bt) { getbintime_fromclock(bt, sysclock_active); } void getnanotime(struct timespec *tsp) { getnanotime_fromclock(tsp, sysclock_active); } void getmicrotime(struct timeval *tvp) { getmicrouptime_fromclock(tvp, sysclock_active); } #endif /* FFCLOCK */ /* * This is a clone of getnanotime and used for walltimestamps. * The dtrace_ prefix prevents fbt from creating probes for * it so walltimestamp can be safely used in all fbt probes. */ void dtrace_getnanotime(struct timespec *tsp) { struct timehands *th; u_int gen; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); *tsp = th->th_nanotime; atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); } /* * System clock currently providing time to the system. Modifiable via sysctl * when the FFCLOCK option is defined. */ int sysclock_active = SYSCLOCK_FBCK; /* Internal NTP status and error estimates. */ extern int time_status; extern long time_esterror; /* * Take a snapshot of sysclock data which can be used to compare system clocks * and generate timestamps after the fact. */ void sysclock_getsnapshot(struct sysclock_snap *clock_snap, int fast) { struct fbclock_info *fbi; struct timehands *th; struct bintime bt; unsigned int delta, gen; #ifdef FFCLOCK ffcounter ffcount; struct fftimehands *ffth; struct ffclock_info *ffi; struct ffclock_estimate cest; ffi = &clock_snap->ff_info; #endif fbi = &clock_snap->fb_info; delta = 0; do { th = timehands; gen = atomic_load_acq_int(&th->th_generation); fbi->th_scale = th->th_scale; fbi->tick_time = th->th_offset; #ifdef FFCLOCK ffth = fftimehands; ffi->tick_time = ffth->tick_time_lerp; ffi->tick_time_lerp = ffth->tick_time_lerp; ffi->period = ffth->cest.period; ffi->period_lerp = ffth->period_lerp; clock_snap->ffcount = ffth->tick_ffcount; cest = ffth->cest; #endif if (!fast) delta = tc_delta(th); atomic_thread_fence_acq(); } while (gen == 0 || gen != th->th_generation); clock_snap->delta = delta; clock_snap->sysclock_active = sysclock_active; /* Record feedback clock status and error. */ clock_snap->fb_info.status = time_status; /* XXX: Very crude estimate of feedback clock error. */ bt.sec = time_esterror / 1000000; bt.frac = ((time_esterror - bt.sec) * 1000000) * (uint64_t)18446744073709ULL; clock_snap->fb_info.error = bt; #ifdef FFCLOCK if (!fast) clock_snap->ffcount += delta; /* Record feed-forward clock leap second adjustment. */ ffi->leapsec_adjustment = cest.leapsec_total; if (clock_snap->ffcount > cest.leapsec_next) ffi->leapsec_adjustment -= cest.leapsec; /* Record feed-forward clock status and error. */ clock_snap->ff_info.status = cest.status; ffcount = clock_snap->ffcount - cest.update_ffcount; ffclock_convert_delta(ffcount, cest.period, &bt); /* 18446744073709 = int(2^64/1e12), err_bound_rate in [ps/s]. */ bintime_mul(&bt, cest.errb_rate * (uint64_t)18446744073709ULL); /* 18446744073 = int(2^64 / 1e9), since err_abs in [ns]. */ bintime_addx(&bt, cest.errb_abs * (uint64_t)18446744073ULL); clock_snap->ff_info.error = bt; #endif } /* * Convert a sysclock snapshot into a struct bintime based on the specified * clock source and flags. */ int sysclock_snap2bintime(struct sysclock_snap *cs, struct bintime *bt, int whichclock, uint32_t flags) { struct bintime boottimebin; #ifdef FFCLOCK struct bintime bt2; uint64_t period; #endif switch (whichclock) { case SYSCLOCK_FBCK: *bt = cs->fb_info.tick_time; /* If snapshot was created with !fast, delta will be >0. */ if (cs->delta > 0) bintime_addx(bt, cs->fb_info.th_scale * cs->delta); if ((flags & FBCLOCK_UPTIME) == 0) { getboottimebin(&boottimebin); bintime_add(bt, &boottimebin); } break; #ifdef FFCLOCK case SYSCLOCK_FFWD: if (flags & FFCLOCK_LERP) { *bt = cs->ff_info.tick_time_lerp; period = cs->ff_info.period_lerp; } else { *bt = cs->ff_info.tick_time; period = cs->ff_info.period; } /* If snapshot was created with !fast, delta will be >0. */ if (cs->delta > 0) { ffclock_convert_delta(cs->delta, period, &bt2); bintime_add(bt, &bt2); } /* Leap second adjustment. */ if (flags & FFCLOCK_LEAPSEC) bt->sec -= cs->ff_info.leapsec_adjustment; /* Boot time adjustment, for uptime/monotonic clocks. */ if (flags & FFCLOCK_UPTIME) bintime_sub(bt, &ffclock_boottime); break; #endif default: return (EINVAL); break; } return (0); } /* * Initialize a new timecounter and possibly use it. */ void tc_init(struct timecounter *tc) { u_int u; struct sysctl_oid *tc_root; u = tc->tc_frequency / tc->tc_counter_mask; /* XXX: We need some margin here, 10% is a guess */ u *= 11; u /= 10; if (u > hz && tc->tc_quality >= 0) { tc->tc_quality = -2000; if (bootverbose) { printf("Timecounter \"%s\" frequency %ju Hz", tc->tc_name, (uintmax_t)tc->tc_frequency); printf(" -- Insufficient hz, needs at least %u\n", u); } } else if (tc->tc_quality >= 0 || bootverbose) { printf("Timecounter \"%s\" frequency %ju Hz quality %d\n", tc->tc_name, (uintmax_t)tc->tc_frequency, tc->tc_quality); } tc->tc_next = timecounters; timecounters = tc; /* * Set up sysctl tree for this counter. */ tc_root = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_kern_timecounter_tc), OID_AUTO, tc->tc_name, CTLFLAG_RW, 0, "timecounter description"); SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, "mask", CTLFLAG_RD, &(tc->tc_counter_mask), 0, "mask for implemented bits"); SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, "counter", CTLTYPE_UINT | CTLFLAG_RD, tc, sizeof(*tc), sysctl_kern_timecounter_get, "IU", "current timecounter value"); SYSCTL_ADD_PROC(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, "frequency", CTLTYPE_U64 | CTLFLAG_RD, tc, sizeof(*tc), sysctl_kern_timecounter_freq, "QU", "timecounter frequency"); SYSCTL_ADD_INT(NULL, SYSCTL_CHILDREN(tc_root), OID_AUTO, "quality", CTLFLAG_RD, &(tc->tc_quality), 0, "goodness of time counter"); /* * Do not automatically switch if the current tc was specifically * chosen. Never automatically use a timecounter with negative quality. * Even though we run on the dummy counter, switching here may be * worse since this timecounter may not be monotonic. */ if (tc_chosen) return; if (tc->tc_quality < 0) return; if (tc->tc_quality < timecounter->tc_quality) return; if (tc->tc_quality == timecounter->tc_quality && tc->tc_frequency < timecounter->tc_frequency) return; (void)tc->tc_get_timecount(tc); (void)tc->tc_get_timecount(tc); timecounter = tc; } /* Report the frequency of the current timecounter. */ uint64_t tc_getfrequency(void) { return (timehands->th_counter->tc_frequency); } static struct mtx tc_setclock_mtx; MTX_SYSINIT(tc_setclock_init, &tc_setclock_mtx, "tcsetc", MTX_SPIN); /* * Step our concept of UTC. This is done by modifying our estimate of * when we booted. */ void tc_setclock(struct timespec *ts) { struct timespec tbef, taft; struct bintime bt, bt2; timespec2bintime(ts, &bt); nanotime(&tbef); mtx_lock_spin(&tc_setclock_mtx); cpu_tick_calibrate(1); binuptime(&bt2); bintime_sub(&bt, &bt2); /* XXX fiddle all the little crinkly bits around the fiords... */ tc_windup(&bt); mtx_unlock_spin(&tc_setclock_mtx); if (timestepwarnings) { nanotime(&taft); log(LOG_INFO, "Time stepped from %jd.%09ld to %jd.%09ld (%jd.%09ld)\n", (intmax_t)tbef.tv_sec, tbef.tv_nsec, (intmax_t)taft.tv_sec, taft.tv_nsec, (intmax_t)ts->tv_sec, ts->tv_nsec); } } /* * Initialize the next struct timehands in the ring and make * it the active timehands. Along the way we might switch to a different * timecounter and/or do seconds processing in NTP. Slightly magic. */ static void tc_windup(struct bintime *new_boottimebin) { struct bintime bt; struct timehands *th, *tho; uint64_t scale; u_int delta, ncount, ogen; int i; time_t t; /* * Make the next timehands a copy of the current one, but do * not overwrite the generation or next pointer. While we * update the contents, the generation must be zero. We need * to ensure that the zero generation is visible before the * data updates become visible, which requires release fence. * For similar reasons, re-reading of the generation after the * data is read should use acquire fence. */ tho = timehands; th = tho->th_next; ogen = th->th_generation; th->th_generation = 0; atomic_thread_fence_rel(); bcopy(tho, th, offsetof(struct timehands, th_generation)); if (new_boottimebin != NULL) th->th_boottime = *new_boottimebin; /* * Capture a timecounter delta on the current timecounter and if * changing timecounters, a counter value from the new timecounter. * Update the offset fields accordingly. */ delta = tc_delta(th); if (th->th_counter != timecounter) ncount = timecounter->tc_get_timecount(timecounter); else ncount = 0; #ifdef FFCLOCK ffclock_windup(delta); #endif th->th_offset_count += delta; th->th_offset_count &= th->th_counter->tc_counter_mask; while (delta > th->th_counter->tc_frequency) { /* Eat complete unadjusted seconds. */ delta -= th->th_counter->tc_frequency; th->th_offset.sec++; } if ((delta > th->th_counter->tc_frequency / 2) && (th->th_scale * delta < ((uint64_t)1 << 63))) { /* The product th_scale * delta just barely overflows. */ th->th_offset.sec++; } bintime_addx(&th->th_offset, th->th_scale * delta); /* * Hardware latching timecounters may not generate interrupts on * PPS events, so instead we poll them. There is a finite risk that * the hardware might capture a count which is later than the one we * got above, and therefore possibly in the next NTP second which might * have a different rate than the current NTP second. It doesn't * matter in practice. */ if (tho->th_counter->tc_poll_pps) tho->th_counter->tc_poll_pps(tho->th_counter); /* * Deal with NTP second processing. The for loop normally * iterates at most once, but in extreme situations it might * keep NTP sane if timeouts are not run for several seconds. * At boot, the time step can be large when the TOD hardware * has been read, so on really large steps, we call * ntp_update_second only twice. We need to call it twice in * case we missed a leap second. */ bt = th->th_offset; bintime_add(&bt, &th->th_boottime); i = bt.sec - tho->th_microtime.tv_sec; if (i > LARGE_STEP) i = 2; for (; i > 0; i--) { t = bt.sec; ntp_update_second(&th->th_adjustment, &bt.sec); if (bt.sec != t) th->th_boottime.sec += bt.sec - t; } th->th_bintime = th->th_offset; bintime_add(&th->th_bintime, &th->th_boottime); /* Update the UTC timestamps used by the get*() functions. */ /* XXX shouldn't do this here. Should force non-`get' versions. */ bintime2timeval(&bt, &th->th_microtime); bintime2timespec(&bt, &th->th_nanotime); /* Now is a good time to change timecounters. */ if (th->th_counter != timecounter) { #ifndef __arm__ if ((timecounter->tc_flags & TC_FLAGS_C2STOP) != 0) cpu_disable_c2_sleep++; if ((th->th_counter->tc_flags & TC_FLAGS_C2STOP) != 0) cpu_disable_c2_sleep--; #endif th->th_counter = timecounter; th->th_offset_count = ncount; tc_min_ticktock_freq = max(1, timecounter->tc_frequency / (((uint64_t)timecounter->tc_counter_mask + 1) / 3)); #ifdef FFCLOCK ffclock_change_tc(th); #endif } /*- * Recalculate the scaling factor. We want the number of 1/2^64 * fractions of a second per period of the hardware counter, taking * into account the th_adjustment factor which the NTP PLL/adjtime(2) * processing provides us with. * * The th_adjustment is nanoseconds per second with 32 bit binary * fraction and we want 64 bit binary fraction of second: * * x = a * 2^32 / 10^9 = a * 4.294967296 * * The range of th_adjustment is +/- 5000PPM so inside a 64bit int * we can only multiply by about 850 without overflowing, that * leaves no suitably precise fractions for multiply before divide. * * Divide before multiply with a fraction of 2199/512 results in a * systematic undercompensation of 10PPM of th_adjustment. On a * 5000PPM adjustment this is a 0.05PPM error. This is acceptable. * * We happily sacrifice the lowest of the 64 bits of our result * to the goddess of code clarity. * */ scale = (uint64_t)1 << 63; scale += (th->th_adjustment / 1024) * 2199; scale /= th->th_counter->tc_frequency; th->th_scale = scale * 2; /* * Now that the struct timehands is again consistent, set the new * generation number, making sure to not make it zero. */ if (++ogen == 0) ogen = 1; atomic_store_rel_int(&th->th_generation, ogen); /* Go live with the new struct timehands. */ #ifdef FFCLOCK switch (sysclock_active) { case SYSCLOCK_FBCK: #endif time_second = th->th_microtime.tv_sec; time_uptime = th->th_offset.sec; #ifdef FFCLOCK break; case SYSCLOCK_FFWD: time_second = fftimehands->tick_time_lerp.sec; time_uptime = fftimehands->tick_time_lerp.sec - ffclock_boottime.sec; break; } #endif timehands = th; timekeep_push_vdso(); } /* Report or change the active timecounter hardware. */ static int sysctl_kern_timecounter_hardware(SYSCTL_HANDLER_ARGS) { char newname[32]; struct timecounter *newtc, *tc; int error; tc = timecounter; strlcpy(newname, tc->tc_name, sizeof(newname)); error = sysctl_handle_string(oidp, &newname[0], sizeof(newname), req); if (error != 0 || req->newptr == NULL) return (error); /* Record that the tc in use now was specifically chosen. */ tc_chosen = 1; if (strcmp(newname, tc->tc_name) == 0) return (0); for (newtc = timecounters; newtc != NULL; newtc = newtc->tc_next) { if (strcmp(newname, newtc->tc_name) != 0) continue; /* Warm up new timecounter. */ (void)newtc->tc_get_timecount(newtc); (void)newtc->tc_get_timecount(newtc); timecounter = newtc; /* * The vdso timehands update is deferred until the next * 'tc_windup()'. * * This is prudent given that 'timekeep_push_vdso()' does not * use any locking and that it can be called in hard interrupt * context via 'tc_windup()'. */ return (0); } return (EINVAL); } SYSCTL_PROC(_kern_timecounter, OID_AUTO, hardware, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, sysctl_kern_timecounter_hardware, "A", "Timecounter hardware selected"); /* Report the available timecounter hardware. */ static int sysctl_kern_timecounter_choice(SYSCTL_HANDLER_ARGS) { struct sbuf sb; struct timecounter *tc; int error; sbuf_new_for_sysctl(&sb, NULL, 0, req); for (tc = timecounters; tc != NULL; tc = tc->tc_next) { if (tc != timecounters) sbuf_putc(&sb, ' '); sbuf_printf(&sb, "%s(%d)", tc->tc_name, tc->tc_quality); } error = sbuf_finish(&sb); sbuf_delete(&sb); return (error); } SYSCTL_PROC(_kern_timecounter, OID_AUTO, choice, CTLTYPE_STRING | CTLFLAG_RD, 0, 0, sysctl_kern_timecounter_choice, "A", "Timecounter hardware detected"); /* * RFC 2783 PPS-API implementation. */ /* * Return true if the driver is aware of the abi version extensions in the * pps_state structure, and it supports at least the given abi version number. */ static inline int abi_aware(struct pps_state *pps, int vers) { return ((pps->kcmode & KCMODE_ABIFLAG) && pps->driver_abi >= vers); } static int pps_fetch(struct pps_fetch_args *fapi, struct pps_state *pps) { int err, timo; pps_seq_t aseq, cseq; struct timeval tv; if (fapi->tsformat && fapi->tsformat != PPS_TSFMT_TSPEC) return (EINVAL); /* * If no timeout is requested, immediately return whatever values were * most recently captured. If timeout seconds is -1, that's a request * to block without a timeout. WITNESS won't let us sleep forever * without a lock (we really don't need a lock), so just repeatedly * sleep a long time. */ if (fapi->timeout.tv_sec || fapi->timeout.tv_nsec) { if (fapi->timeout.tv_sec == -1) timo = 0x7fffffff; else { tv.tv_sec = fapi->timeout.tv_sec; tv.tv_usec = fapi->timeout.tv_nsec / 1000; timo = tvtohz(&tv); } aseq = pps->ppsinfo.assert_sequence; cseq = pps->ppsinfo.clear_sequence; while (aseq == pps->ppsinfo.assert_sequence && cseq == pps->ppsinfo.clear_sequence) { if (abi_aware(pps, 1) && pps->driver_mtx != NULL) { if (pps->flags & PPSFLAG_MTX_SPIN) { err = msleep_spin(pps, pps->driver_mtx, "ppsfch", timo); } else { err = msleep(pps, pps->driver_mtx, PCATCH, "ppsfch", timo); } } else { err = tsleep(pps, PCATCH, "ppsfch", timo); } if (err == EWOULDBLOCK) { if (fapi->timeout.tv_sec == -1) { continue; } else { return (ETIMEDOUT); } } else if (err != 0) { return (err); } } } pps->ppsinfo.current_mode = pps->ppsparam.mode; fapi->pps_info_buf = pps->ppsinfo; return (0); } int pps_ioctl(u_long cmd, caddr_t data, struct pps_state *pps) { pps_params_t *app; struct pps_fetch_args *fapi; #ifdef FFCLOCK struct pps_fetch_ffc_args *fapi_ffc; #endif #ifdef PPS_SYNC struct pps_kcbind_args *kapi; #endif KASSERT(pps != NULL, ("NULL pps pointer in pps_ioctl")); switch (cmd) { case PPS_IOC_CREATE: return (0); case PPS_IOC_DESTROY: return (0); case PPS_IOC_SETPARAMS: app = (pps_params_t *)data; if (app->mode & ~pps->ppscap) return (EINVAL); #ifdef FFCLOCK /* Ensure only a single clock is selected for ffc timestamp. */ if ((app->mode & PPS_TSCLK_MASK) == PPS_TSCLK_MASK) return (EINVAL); #endif pps->ppsparam = *app; return (0); case PPS_IOC_GETPARAMS: app = (pps_params_t *)data; *app = pps->ppsparam; app->api_version = PPS_API_VERS_1; return (0); case PPS_IOC_GETCAP: *(int*)data = pps->ppscap; return (0); case PPS_IOC_FETCH: fapi = (struct pps_fetch_args *)data; return (pps_fetch(fapi, pps)); #ifdef FFCLOCK case PPS_IOC_FETCH_FFCOUNTER: fapi_ffc = (struct pps_fetch_ffc_args *)data; if (fapi_ffc->tsformat && fapi_ffc->tsformat != PPS_TSFMT_TSPEC) return (EINVAL); if (fapi_ffc->timeout.tv_sec || fapi_ffc->timeout.tv_nsec) return (EOPNOTSUPP); pps->ppsinfo_ffc.current_mode = pps->ppsparam.mode; fapi_ffc->pps_info_buf_ffc = pps->ppsinfo_ffc; /* Overwrite timestamps if feedback clock selected. */ switch (pps->ppsparam.mode & PPS_TSCLK_MASK) { case PPS_TSCLK_FBCK: fapi_ffc->pps_info_buf_ffc.assert_timestamp = pps->ppsinfo.assert_timestamp; fapi_ffc->pps_info_buf_ffc.clear_timestamp = pps->ppsinfo.clear_timestamp; break; case PPS_TSCLK_FFWD: break; default: break; } return (0); #endif /* FFCLOCK */ case PPS_IOC_KCBIND: #ifdef PPS_SYNC kapi = (struct pps_kcbind_args *)data; /* XXX Only root should be able to do this */ if (kapi->tsformat && kapi->tsformat != PPS_TSFMT_TSPEC) return (EINVAL); if (kapi->kernel_consumer != PPS_KC_HARDPPS) return (EINVAL); if (kapi->edge & ~pps->ppscap) return (EINVAL); pps->kcmode = (kapi->edge & KCMODE_EDGEMASK) | (pps->kcmode & KCMODE_ABIFLAG); return (0); #else return (EOPNOTSUPP); #endif default: return (ENOIOCTL); } } void pps_init(struct pps_state *pps) { pps->ppscap |= PPS_TSFMT_TSPEC | PPS_CANWAIT; if (pps->ppscap & PPS_CAPTUREASSERT) pps->ppscap |= PPS_OFFSETASSERT; if (pps->ppscap & PPS_CAPTURECLEAR) pps->ppscap |= PPS_OFFSETCLEAR; #ifdef FFCLOCK pps->ppscap |= PPS_TSCLK_MASK; #endif pps->kcmode &= ~KCMODE_ABIFLAG; } void pps_init_abi(struct pps_state *pps) { pps_init(pps); if (pps->driver_abi > 0) { pps->kcmode |= KCMODE_ABIFLAG; pps->kernel_abi = PPS_ABI_VERSION; } } void pps_capture(struct pps_state *pps) { struct timehands *th; KASSERT(pps != NULL, ("NULL pps pointer in pps_capture")); th = timehands; pps->capgen = atomic_load_acq_int(&th->th_generation); pps->capth = th; #ifdef FFCLOCK pps->capffth = fftimehands; #endif pps->capcount = th->th_counter->tc_get_timecount(th->th_counter); atomic_thread_fence_acq(); if (pps->capgen != th->th_generation) pps->capgen = 0; } void pps_event(struct pps_state *pps, int event) { struct bintime bt; struct timespec ts, *tsp, *osp; u_int tcount, *pcount; int foff; pps_seq_t *pseq; #ifdef FFCLOCK struct timespec *tsp_ffc; pps_seq_t *pseq_ffc; ffcounter *ffcount; #endif #ifdef PPS_SYNC int fhard; #endif KASSERT(pps != NULL, ("NULL pps pointer in pps_event")); /* Nothing to do if not currently set to capture this event type. */ if ((event & pps->ppsparam.mode) == 0) return; /* If the timecounter was wound up underneath us, bail out. */ if (pps->capgen == 0 || pps->capgen != atomic_load_acq_int(&pps->capth->th_generation)) return; /* Things would be easier with arrays. */ if (event == PPS_CAPTUREASSERT) { tsp = &pps->ppsinfo.assert_timestamp; osp = &pps->ppsparam.assert_offset; foff = pps->ppsparam.mode & PPS_OFFSETASSERT; #ifdef PPS_SYNC fhard = pps->kcmode & PPS_CAPTUREASSERT; #endif pcount = &pps->ppscount[0]; pseq = &pps->ppsinfo.assert_sequence; #ifdef FFCLOCK ffcount = &pps->ppsinfo_ffc.assert_ffcount; tsp_ffc = &pps->ppsinfo_ffc.assert_timestamp; pseq_ffc = &pps->ppsinfo_ffc.assert_sequence; #endif } else { tsp = &pps->ppsinfo.clear_timestamp; osp = &pps->ppsparam.clear_offset; foff = pps->ppsparam.mode & PPS_OFFSETCLEAR; #ifdef PPS_SYNC fhard = pps->kcmode & PPS_CAPTURECLEAR; #endif pcount = &pps->ppscount[1]; pseq = &pps->ppsinfo.clear_sequence; #ifdef FFCLOCK ffcount = &pps->ppsinfo_ffc.clear_ffcount; tsp_ffc = &pps->ppsinfo_ffc.clear_timestamp; pseq_ffc = &pps->ppsinfo_ffc.clear_sequence; #endif } /* * If the timecounter changed, we cannot compare the count values, so * we have to drop the rest of the PPS-stuff until the next event. */ if (pps->ppstc != pps->capth->th_counter) { pps->ppstc = pps->capth->th_counter; *pcount = pps->capcount; pps->ppscount[2] = pps->capcount; return; } /* Convert the count to a timespec. */ tcount = pps->capcount - pps->capth->th_offset_count; tcount &= pps->capth->th_counter->tc_counter_mask; bt = pps->capth->th_bintime; bintime_addx(&bt, pps->capth->th_scale * tcount); bintime2timespec(&bt, &ts); /* If the timecounter was wound up underneath us, bail out. */ atomic_thread_fence_acq(); if (pps->capgen != pps->capth->th_generation) return; *pcount = pps->capcount; (*pseq)++; *tsp = ts; if (foff) { timespecadd(tsp, osp); if (tsp->tv_nsec < 0) { tsp->tv_nsec += 1000000000; tsp->tv_sec -= 1; } } #ifdef FFCLOCK *ffcount = pps->capffth->tick_ffcount + tcount; bt = pps->capffth->tick_time; ffclock_convert_delta(tcount, pps->capffth->cest.period, &bt); bintime_add(&bt, &pps->capffth->tick_time); bintime2timespec(&bt, &ts); (*pseq_ffc)++; *tsp_ffc = ts; #endif #ifdef PPS_SYNC if (fhard) { uint64_t scale; /* * Feed the NTP PLL/FLL. * The FLL wants to know how many (hardware) nanoseconds * elapsed since the previous event. */ tcount = pps->capcount - pps->ppscount[2]; pps->ppscount[2] = pps->capcount; tcount &= pps->capth->th_counter->tc_counter_mask; scale = (uint64_t)1 << 63; scale /= pps->capth->th_counter->tc_frequency; scale *= 2; bt.sec = 0; bt.frac = 0; bintime_addx(&bt, scale * tcount); bintime2timespec(&bt, &ts); hardpps(tsp, ts.tv_nsec + 1000000000 * ts.tv_sec); } #endif /* Wakeup anyone sleeping in pps_fetch(). */ wakeup(pps); } /* * Timecounters need to be updated every so often to prevent the hardware * counter from overflowing. Updating also recalculates the cached values * used by the get*() family of functions, so their precision depends on * the update frequency. */ static int tc_tick; SYSCTL_INT(_kern_timecounter, OID_AUTO, tick, CTLFLAG_RD, &tc_tick, 0, "Approximate number of hardclock ticks in a millisecond"); void tc_ticktock(int cnt) { static int count; if (mtx_trylock_spin(&tc_setclock_mtx)) { count += cnt; if (count >= tc_tick) { count = 0; tc_windup(NULL); } mtx_unlock_spin(&tc_setclock_mtx); } } static void __inline tc_adjprecision(void) { int t; if (tc_timepercentage > 0) { t = (99 + tc_timepercentage) / tc_timepercentage; tc_precexp = fls(t + (t >> 1)) - 1; FREQ2BT(hz / tc_tick, &bt_timethreshold); FREQ2BT(hz, &bt_tickthreshold); bintime_shift(&bt_timethreshold, tc_precexp); bintime_shift(&bt_tickthreshold, tc_precexp); } else { tc_precexp = 31; bt_timethreshold.sec = INT_MAX; bt_timethreshold.frac = ~(uint64_t)0; bt_tickthreshold = bt_timethreshold; } sbt_timethreshold = bttosbt(bt_timethreshold); sbt_tickthreshold = bttosbt(bt_tickthreshold); } static int sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS) { int error, val; val = tc_timepercentage; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); tc_timepercentage = val; if (cold) goto done; tc_adjprecision(); done: return (0); } static void inittimecounter(void *dummy) { u_int p; int tick_rate; /* * Set the initial timeout to * max(1, ). * People should probably not use the sysctl to set the timeout * to smaller than its initial value, since that value is the * smallest reasonable one. If they want better timestamps they * should use the non-"get"* functions. */ if (hz > 1000) tc_tick = (hz + 500) / 1000; else tc_tick = 1; tc_adjprecision(); FREQ2BT(hz, &tick_bt); tick_sbt = bttosbt(tick_bt); tick_rate = hz / tc_tick; FREQ2BT(tick_rate, &tc_tick_bt); tc_tick_sbt = bttosbt(tc_tick_bt); p = (tc_tick * 1000000) / hz; printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000); #ifdef FFCLOCK ffclock_init(); #endif /* warm up new timecounter (again) and get rolling. */ (void)timecounter->tc_get_timecount(timecounter); (void)timecounter->tc_get_timecount(timecounter); mtx_lock_spin(&tc_setclock_mtx); tc_windup(NULL); mtx_unlock_spin(&tc_setclock_mtx); } SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL); /* Cpu tick handling -------------------------------------------------*/ static int cpu_tick_variable; static uint64_t cpu_tick_frequency; static DPCPU_DEFINE(uint64_t, tc_cpu_ticks_base); static DPCPU_DEFINE(unsigned, tc_cpu_ticks_last); static uint64_t tc_cpu_ticks(void) { struct timecounter *tc; uint64_t res, *base; unsigned u, *last; critical_enter(); base = DPCPU_PTR(tc_cpu_ticks_base); last = DPCPU_PTR(tc_cpu_ticks_last); tc = timehands->th_counter; u = tc->tc_get_timecount(tc) & tc->tc_counter_mask; if (u < *last) *base += (uint64_t)tc->tc_counter_mask + 1; *last = u; res = u + *base; critical_exit(); return (res); } void cpu_tick_calibration(void) { static time_t last_calib; if (time_uptime != last_calib && !(time_uptime & 0xf)) { cpu_tick_calibrate(0); last_calib = time_uptime; } } /* * This function gets called every 16 seconds on only one designated * CPU in the system from hardclock() via cpu_tick_calibration()(). * * Whenever the real time clock is stepped we get called with reset=1 * to make sure we handle suspend/resume and similar events correctly. */ static void cpu_tick_calibrate(int reset) { static uint64_t c_last; uint64_t c_this, c_delta; static struct bintime t_last; struct bintime t_this, t_delta; uint32_t divi; if (reset) { /* The clock was stepped, abort & reset */ t_last.sec = 0; return; } /* we don't calibrate fixed rate cputicks */ if (!cpu_tick_variable) return; getbinuptime(&t_this); c_this = cpu_ticks(); if (t_last.sec != 0) { c_delta = c_this - c_last; t_delta = t_this; bintime_sub(&t_delta, &t_last); /* * Headroom: * 2^(64-20) / 16[s] = * 2^(44) / 16[s] = * 17.592.186.044.416 / 16 = * 1.099.511.627.776 [Hz] */ divi = t_delta.sec << 20; divi |= t_delta.frac >> (64 - 20); c_delta <<= 20; c_delta /= divi; if (c_delta > cpu_tick_frequency) { if (0 && bootverbose) printf("cpu_tick increased to %ju Hz\n", c_delta); cpu_tick_frequency = c_delta; } } c_last = c_this; t_last = t_this; } void set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var) { if (func == NULL) { cpu_ticks = tc_cpu_ticks; } else { cpu_tick_frequency = freq; cpu_tick_variable = var; cpu_ticks = func; } } uint64_t cpu_tickrate(void) { if (cpu_ticks == tc_cpu_ticks) return (tc_getfrequency()); return (cpu_tick_frequency); } /* * We need to be slightly careful converting cputicks to microseconds. * There is plenty of margin in 64 bits of microseconds (half a million * years) and in 64 bits at 4 GHz (146 years), but if we do a multiply * before divide conversion (to retain precision) we find that the * margin shrinks to 1.5 hours (one millionth of 146y). * With a three prong approach we never lose significant bits, no * matter what the cputick rate and length of timeinterval is. */ uint64_t cputick2usec(uint64_t tick) { if (tick > 18446744073709551LL) /* floor(2^64 / 1000) */ return (tick / (cpu_tickrate() / 1000000LL)); else if (tick > 18446744073709LL) /* floor(2^64 / 1000000) */ return ((tick * 1000LL) / (cpu_tickrate() / 1000LL)); else return ((tick * 1000000LL) / cpu_tickrate()); } cpu_tick_f *cpu_ticks = tc_cpu_ticks; static int vdso_th_enable = 1; static int sysctl_fast_gettime(SYSCTL_HANDLER_ARGS) { int old_vdso_th_enable, error; old_vdso_th_enable = vdso_th_enable; error = sysctl_handle_int(oidp, &old_vdso_th_enable, 0, req); if (error != 0) return (error); vdso_th_enable = old_vdso_th_enable; return (0); } SYSCTL_PROC(_kern_timecounter, OID_AUTO, fast_gettime, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, sysctl_fast_gettime, "I", "Enable fast time of day"); uint32_t tc_fill_vdso_timehands(struct vdso_timehands *vdso_th) { struct timehands *th; uint32_t enabled; th = timehands; - vdso_th->th_algo = VDSO_TH_ALGO_1; vdso_th->th_scale = th->th_scale; vdso_th->th_offset_count = th->th_offset_count; vdso_th->th_counter_mask = th->th_counter->tc_counter_mask; vdso_th->th_offset = th->th_offset; vdso_th->th_boottime = th->th_boottime; - enabled = cpu_fill_vdso_timehands(vdso_th, th->th_counter); + if (th->th_counter->tc_fill_vdso_timehands != NULL) { + enabled = th->th_counter->tc_fill_vdso_timehands(vdso_th, + th->th_counter); + } else + enabled = 0; if (!vdso_th_enable) enabled = 0; return (enabled); } #ifdef COMPAT_FREEBSD32 uint32_t tc_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32) { struct timehands *th; uint32_t enabled; th = timehands; - vdso_th32->th_algo = VDSO_TH_ALGO_1; *(uint64_t *)&vdso_th32->th_scale[0] = th->th_scale; vdso_th32->th_offset_count = th->th_offset_count; vdso_th32->th_counter_mask = th->th_counter->tc_counter_mask; vdso_th32->th_offset.sec = th->th_offset.sec; *(uint64_t *)&vdso_th32->th_offset.frac[0] = th->th_offset.frac; vdso_th32->th_boottime.sec = th->th_boottime.sec; *(uint64_t *)&vdso_th32->th_boottime.frac[0] = th->th_boottime.frac; - enabled = cpu_fill_vdso_timehands32(vdso_th32, th->th_counter); + if (th->th_counter->tc_fill_vdso_timehands32 != NULL) { + enabled = th->th_counter->tc_fill_vdso_timehands32(vdso_th32, + th->th_counter); + } else + enabled = 0; if (!vdso_th_enable) enabled = 0; return (enabled); } #endif Index: head/sys/sys/timetc.h =================================================================== --- head/sys/sys/timetc.h (revision 304284) +++ head/sys/sys/timetc.h (revision 304285) @@ -1,89 +1,97 @@ /*- * ---------------------------------------------------------------------------- * "THE BEER-WARE LICENSE" (Revision 42): * wrote this file. As long as you retain this notice you * can do whatever you want with this stuff. If we meet some day, and you think * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp * ---------------------------------------------------------------------------- * * $FreeBSD$ */ #ifndef _SYS_TIMETC_H_ #define _SYS_TIMETC_H_ #ifndef _KERNEL #error "no user-serviceable parts inside" #endif /*- * `struct timecounter' is the interface between the hardware which implements * a timecounter and the MI code which uses this to keep track of time. * * A timecounter is a binary counter which has two properties: * * it runs at a fixed, known frequency. * * it has sufficient bits to not roll over in less than approximately * max(2 msec, 2/HZ seconds). (The value 2 here is really 1 + delta, * for some indeterminate value of delta.) */ struct timecounter; +struct vdso_timehands; +struct vdso_timehands32; typedef u_int timecounter_get_t(struct timecounter *); typedef void timecounter_pps_t(struct timecounter *); +typedef uint32_t timecounter_fill_vdso_timehands_t(struct vdso_timehands *, + struct timecounter *); +typedef uint32_t timecounter_fill_vdso_timehands32_t(struct vdso_timehands32 *, + struct timecounter *); struct timecounter { timecounter_get_t *tc_get_timecount; /* * This function reads the counter. It is not required to * mask any unimplemented bits out, as long as they are * constant. */ timecounter_pps_t *tc_poll_pps; /* * This function is optional. It will be called whenever the * timecounter is rewound, and is intended to check for PPS * events. Normal hardware does not need it but timecounters * which latch PPS in hardware (like sys/pci/xrpu.c) do. */ u_int tc_counter_mask; /* This mask should mask off any unimplemented bits. */ uint64_t tc_frequency; /* Frequency of the counter in Hz. */ const char *tc_name; /* Name of the timecounter. */ int tc_quality; /* * Used to determine if this timecounter is better than * another timecounter higher means better. Negative * means "only use at explicit request". */ u_int tc_flags; #define TC_FLAGS_C2STOP 1 /* Timer dies in C2+. */ #define TC_FLAGS_SUSPEND_SAFE 2 /* * Timer functional across * suspend/resume. */ void *tc_priv; /* Pointer to the timecounter's private parts. */ struct timecounter *tc_next; /* Pointer to the next timecounter. */ + timecounter_fill_vdso_timehands_t *tc_fill_vdso_timehands; + timecounter_fill_vdso_timehands32_t *tc_fill_vdso_timehands32; }; extern struct timecounter *timecounter; extern int tc_min_ticktock_freq; /* * Minimal tc_ticktock() call frequency, * required to handle counter wraps. */ u_int64_t tc_getfrequency(void); void tc_init(struct timecounter *tc); void tc_setclock(struct timespec *ts); void tc_ticktock(int cnt); void cpu_tick_calibration(void); #ifdef SYSCTL_DECL SYSCTL_DECL(_kern_timecounter); #endif #endif /* !_SYS_TIMETC_H_ */ Index: head/sys/sys/vdso.h =================================================================== --- head/sys/sys/vdso.h (revision 304284) +++ head/sys/sys/vdso.h (revision 304285) @@ -1,131 +1,132 @@ /*- * Copyright 2012 Konstantin Belousov . * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _SYS_VDSO_H #define _SYS_VDSO_H #include #include struct vdso_timehands { uint32_t th_algo; uint32_t th_gen; uint64_t th_scale; uint32_t th_offset_count; uint32_t th_counter_mask; struct bintime th_offset; struct bintime th_boottime; VDSO_TIMEHANDS_MD }; struct vdso_timekeep { uint32_t tk_ver; uint32_t tk_enabled; uint32_t tk_current; struct vdso_timehands tk_th[]; }; #define VDSO_TK_CURRENT_BUSY 0xffffffff #define VDSO_TK_VER_1 0x1 #define VDSO_TK_VER_CURR VDSO_TK_VER_1 #define VDSO_TH_ALGO_1 0x1 +#define VDSO_TH_ALGO_2 0x2 #ifndef _KERNEL struct timespec; struct timeval; struct timezone; int __vdso_clock_gettime(clockid_t clock_id, struct timespec *ts); int __vdso_gettimeofday(struct timeval *tv, struct timezone *tz); -u_int __vdso_gettc(const struct vdso_timehands *vdso_th); +int __vdso_gettc(const struct vdso_timehands *vdso_th, u_int *tc); int __vdso_gettimekeep(struct vdso_timekeep **tk); #endif #ifdef _KERNEL struct timecounter; struct vdso_sv_tk { int sv_timekeep_off; int sv_timekeep_curr; uint32_t sv_timekeep_gen; }; void timekeep_push_vdso(void); uint32_t tc_fill_vdso_timehands(struct vdso_timehands *vdso_th); /* * The cpu_fill_vdso_timehands() function should fill MD-part of the * struct vdso_timehands, which is both machine- and * timecounter-depended. The return value should be 1 if fast * userspace timecounter is enabled by hardware, and 0 otherwise. The * global sysctl enable override is handled by machine-independed code * after cpu_fill_vdso_timehands() call is made. */ uint32_t cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc); struct vdso_sv_tk *alloc_sv_tk(void); #define VDSO_TH_NUM 4 #ifdef COMPAT_FREEBSD32 struct bintime32 { uint32_t sec; uint32_t frac[2]; }; struct vdso_timehands32 { uint32_t th_algo; uint32_t th_gen; uint32_t th_scale[2]; uint32_t th_offset_count; uint32_t th_counter_mask; struct bintime32 th_offset; struct bintime32 th_boottime; VDSO_TIMEHANDS_MD32 }; struct vdso_timekeep32 { uint32_t tk_ver; uint32_t tk_enabled; uint32_t tk_current; struct vdso_timehands32 tk_th[]; }; uint32_t tc_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32); uint32_t cpu_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32, struct timecounter *tc); struct vdso_sv_tk *alloc_sv_tk_compat32(void); #endif #endif #endif Index: head/sys/x86/include/vdso.h =================================================================== --- head/sys/x86/include/vdso.h (revision 304284) +++ head/sys/x86/include/vdso.h (revision 304285) @@ -1,42 +1,50 @@ /*- * Copyright 2012 Konstantin Belousov . + * Copyright 2016 The FreeBSD Foundation. * All rights reserved. * + * Portions of this software were developed by Konstantin Belousov + * under sponsorship from the FreeBSD Foundation. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _X86_VDSO_H #define _X86_VDSO_H #define VDSO_TIMEHANDS_MD \ uint32_t th_x86_shift; \ - uint32_t th_res[7]; + uint32_t th_x86_hpet_idx; \ + uint32_t th_res[6]; + +#define VDSO_TH_ALGO_X86_TSC VDSO_TH_ALGO_1 +#define VDSO_TH_ALGO_X86_HPET VDSO_TH_ALGO_2 #ifdef _KERNEL #ifdef COMPAT_FREEBSD32 #define VDSO_TIMEHANDS_MD32 VDSO_TIMEHANDS_MD #endif #endif #endif Index: head/sys/x86/x86/tsc.c =================================================================== --- head/sys/x86/x86/tsc.c (revision 304284) +++ head/sys/x86/x86/tsc.c (revision 304285) @@ -1,746 +1,759 @@ /*- * Copyright (c) 1998-2003 Poul-Henning Kamp * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ #include __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_clock.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include "cpufreq_if.h" uint64_t tsc_freq; int tsc_is_invariant; int tsc_perf_stat; static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag; SYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN, &tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant"); #ifdef SMP int smp_tsc; SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0, "Indicates whether the TSC is safe to use in SMP mode"); int smp_tsc_adjust = 0; SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc_adjust, CTLFLAG_RDTUN, &smp_tsc_adjust, 0, "Try to adjust TSC on APs to match BSP"); #endif static int tsc_shift = 1; SYSCTL_INT(_kern_timecounter, OID_AUTO, tsc_shift, CTLFLAG_RDTUN, &tsc_shift, 0, "Shift to pre-apply for the maximum TSC frequency"); static int tsc_disabled; SYSCTL_INT(_machdep, OID_AUTO, disable_tsc, CTLFLAG_RDTUN, &tsc_disabled, 0, "Disable x86 Time Stamp Counter"); static int tsc_skip_calibration; SYSCTL_INT(_machdep, OID_AUTO, disable_tsc_calibration, CTLFLAG_RDTUN, &tsc_skip_calibration, 0, "Disable TSC frequency calibration"); static void tsc_freq_changed(void *arg, const struct cf_level *level, int status); static void tsc_freq_changing(void *arg, const struct cf_level *level, int *status); static unsigned tsc_get_timecount(struct timecounter *tc); static inline unsigned tsc_get_timecount_low(struct timecounter *tc); static unsigned tsc_get_timecount_lfence(struct timecounter *tc); static unsigned tsc_get_timecount_low_lfence(struct timecounter *tc); static unsigned tsc_get_timecount_mfence(struct timecounter *tc); static unsigned tsc_get_timecount_low_mfence(struct timecounter *tc); static void tsc_levels_changed(void *arg, int unit); +static uint32_t x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th, + struct timecounter *tc); +#ifdef COMPAT_FREEBSD32 +static uint32_t x86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32, + struct timecounter *tc); +#endif static struct timecounter tsc_timecounter = { - tsc_get_timecount, /* get_timecount */ - 0, /* no poll_pps */ - ~0u, /* counter_mask */ - 0, /* frequency */ - "TSC", /* name */ - 800, /* quality (adjusted in code) */ + .tc_get_timecount = tsc_get_timecount, + .tc_counter_mask = ~0u, + .tc_name = "TSC", + .tc_quality = 800, /* adjusted in code */ + .tc_fill_vdso_timehands = x86_tsc_vdso_timehands, +#ifdef COMPAT_FREEBSD32 + .tc_fill_vdso_timehands32 = x86_tsc_vdso_timehands32, +#endif }; static void tsc_freq_vmware(void) { u_int regs[4]; if (hv_high >= 0x40000010) { do_cpuid(0x40000010, regs); tsc_freq = regs[0] * 1000; } else { vmware_hvcall(VMW_HVCMD_GETHZ, regs); if (regs[1] != UINT_MAX) tsc_freq = regs[0] | ((uint64_t)regs[1] << 32); } tsc_is_invariant = 1; } static void tsc_freq_intel(void) { char brand[48]; u_int regs[4]; uint64_t freq; char *p; u_int i; /* * Intel Processor Identification and the CPUID Instruction * Application Note 485. * http://www.intel.com/assets/pdf/appnote/241618.pdf */ if (cpu_exthigh >= 0x80000004) { p = brand; for (i = 0x80000002; i < 0x80000005; i++) { do_cpuid(i, regs); memcpy(p, regs, sizeof(regs)); p += sizeof(regs); } p = NULL; for (i = 0; i < sizeof(brand) - 1; i++) if (brand[i] == 'H' && brand[i + 1] == 'z') p = brand + i; if (p != NULL) { p -= 5; switch (p[4]) { case 'M': i = 1; break; case 'G': i = 1000; break; case 'T': i = 1000000; break; default: return; } #define C2D(c) ((c) - '0') if (p[1] == '.') { freq = C2D(p[0]) * 1000; freq += C2D(p[2]) * 100; freq += C2D(p[3]) * 10; freq *= i * 1000; } else { freq = C2D(p[0]) * 1000; freq += C2D(p[1]) * 100; freq += C2D(p[2]) * 10; freq += C2D(p[3]); freq *= i * 1000000; } #undef C2D tsc_freq = freq; } } } static void probe_tsc_freq(void) { u_int regs[4]; uint64_t tsc1, tsc2; if (cpu_high >= 6) { do_cpuid(6, regs); if ((regs[2] & CPUID_PERF_STAT) != 0) { /* * XXX Some emulators expose host CPUID without actual * support for these MSRs. We must test whether they * really work. */ wrmsr(MSR_MPERF, 0); wrmsr(MSR_APERF, 0); DELAY(10); if (rdmsr(MSR_MPERF) > 0 && rdmsr(MSR_APERF) > 0) tsc_perf_stat = 1; } } if (vm_guest == VM_GUEST_VMWARE) { tsc_freq_vmware(); return; } switch (cpu_vendor_id) { case CPU_VENDOR_AMD: if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 || (vm_guest == VM_GUEST_NO && CPUID_TO_FAMILY(cpu_id) >= 0x10)) tsc_is_invariant = 1; if (cpu_feature & CPUID_SSE2) { tsc_timecounter.tc_get_timecount = tsc_get_timecount_mfence; } break; case CPU_VENDOR_INTEL: if ((amd_pminfo & AMDPM_TSC_INVARIANT) != 0 || (vm_guest == VM_GUEST_NO && ((CPUID_TO_FAMILY(cpu_id) == 0x6 && CPUID_TO_MODEL(cpu_id) >= 0xe) || (CPUID_TO_FAMILY(cpu_id) == 0xf && CPUID_TO_MODEL(cpu_id) >= 0x3)))) tsc_is_invariant = 1; if (cpu_feature & CPUID_SSE2) { tsc_timecounter.tc_get_timecount = tsc_get_timecount_lfence; } break; case CPU_VENDOR_CENTAUR: if (vm_guest == VM_GUEST_NO && CPUID_TO_FAMILY(cpu_id) == 0x6 && CPUID_TO_MODEL(cpu_id) >= 0xf && (rdmsr(0x1203) & 0x100000000ULL) == 0) tsc_is_invariant = 1; if (cpu_feature & CPUID_SSE2) { tsc_timecounter.tc_get_timecount = tsc_get_timecount_lfence; } break; } if (tsc_skip_calibration) { if (cpu_vendor_id == CPU_VENDOR_INTEL) tsc_freq_intel(); return; } if (bootverbose) printf("Calibrating TSC clock ... "); tsc1 = rdtsc(); DELAY(1000000); tsc2 = rdtsc(); tsc_freq = tsc2 - tsc1; if (bootverbose) printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq); } void init_TSC(void) { if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled) return; #ifdef __i386__ /* The TSC is known to be broken on certain CPUs. */ switch (cpu_vendor_id) { case CPU_VENDOR_AMD: switch (cpu_id & 0xFF0) { case 0x500: /* K5 Model 0 */ return; } break; case CPU_VENDOR_CENTAUR: switch (cpu_id & 0xff0) { case 0x540: /* * http://www.centtech.com/c6_data_sheet.pdf * * I-12 RDTSC may return incoherent values in EDX:EAX * I-13 RDTSC hangs when certain event counters are used */ return; } break; case CPU_VENDOR_NSC: switch (cpu_id & 0xff0) { case 0x540: if ((cpu_id & CPUID_STEPPING) == 0) return; break; } break; } #endif probe_tsc_freq(); /* * Inform CPU accounting about our boot-time clock rate. This will * be updated if someone loads a cpufreq driver after boot that * discovers a new max frequency. */ if (tsc_freq != 0) set_cputicker(rdtsc, tsc_freq, !tsc_is_invariant); if (tsc_is_invariant) return; /* Register to find out about changes in CPU frequency. */ tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change, tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST); tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change, tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST); tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed, tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY); } #ifdef SMP /* * RDTSC is not a serializing instruction, and does not drain * instruction stream, so we need to drain the stream before executing * it. It could be fixed by use of RDTSCP, except the instruction is * not available everywhere. * * Use CPUID for draining in the boot-time SMP constistency test. The * timecounters use MFENCE for AMD CPUs, and LFENCE for others (Intel * and VIA) when SSE2 is present, and nothing on older machines which * also do not issue RDTSC prematurely. There, testing for SSE2 and * vendor is too cumbersome, and we learn about TSC presence from CPUID. * * Do not use do_cpuid(), since we do not need CPUID results, which * have to be written into memory with do_cpuid(). */ #define TSC_READ(x) \ static void \ tsc_read_##x(void *arg) \ { \ uint64_t *tsc = arg; \ u_int cpu = PCPU_GET(cpuid); \ \ __asm __volatile("cpuid" : : : "eax", "ebx", "ecx", "edx"); \ tsc[cpu * 3 + x] = rdtsc(); \ } TSC_READ(0) TSC_READ(1) TSC_READ(2) #undef TSC_READ #define N 1000 static void comp_smp_tsc(void *arg) { uint64_t *tsc; int64_t d1, d2; u_int cpu = PCPU_GET(cpuid); u_int i, j, size; size = (mp_maxid + 1) * 3; for (i = 0, tsc = arg; i < N; i++, tsc += size) CPU_FOREACH(j) { if (j == cpu) continue; d1 = tsc[cpu * 3 + 1] - tsc[j * 3]; d2 = tsc[cpu * 3 + 2] - tsc[j * 3 + 1]; if (d1 <= 0 || d2 <= 0) { smp_tsc = 0; return; } } } static void adj_smp_tsc(void *arg) { uint64_t *tsc; int64_t d, min, max; u_int cpu = PCPU_GET(cpuid); u_int first, i, size; first = CPU_FIRST(); if (cpu == first) return; min = INT64_MIN; max = INT64_MAX; size = (mp_maxid + 1) * 3; for (i = 0, tsc = arg; i < N; i++, tsc += size) { d = tsc[first * 3] - tsc[cpu * 3 + 1]; if (d > min) min = d; d = tsc[first * 3 + 1] - tsc[cpu * 3 + 2]; if (d > min) min = d; d = tsc[first * 3 + 1] - tsc[cpu * 3]; if (d < max) max = d; d = tsc[first * 3 + 2] - tsc[cpu * 3 + 1]; if (d < max) max = d; } if (min > max) return; d = min / 2 + max / 2; __asm __volatile ( "movl $0x10, %%ecx\n\t" "rdmsr\n\t" "addl %%edi, %%eax\n\t" "adcl %%esi, %%edx\n\t" "wrmsr\n" : /* No output */ : "D" ((uint32_t)d), "S" ((uint32_t)(d >> 32)) : "ax", "cx", "dx", "cc" ); } static int test_tsc(void) { uint64_t *data, *tsc; u_int i, size, adj; if ((!smp_tsc && !tsc_is_invariant) || vm_guest) return (-100); size = (mp_maxid + 1) * 3; data = malloc(sizeof(*data) * size * N, M_TEMP, M_WAITOK); adj = 0; retry: for (i = 0, tsc = data; i < N; i++, tsc += size) smp_rendezvous(tsc_read_0, tsc_read_1, tsc_read_2, tsc); smp_tsc = 1; /* XXX */ smp_rendezvous(smp_no_rendevous_barrier, comp_smp_tsc, smp_no_rendevous_barrier, data); if (!smp_tsc && adj < smp_tsc_adjust) { adj++; smp_rendezvous(smp_no_rendevous_barrier, adj_smp_tsc, smp_no_rendevous_barrier, data); goto retry; } free(data, M_TEMP); if (bootverbose) printf("SMP: %sed TSC synchronization test%s\n", smp_tsc ? "pass" : "fail", adj > 0 ? " after adjustment" : ""); if (smp_tsc && tsc_is_invariant) { switch (cpu_vendor_id) { case CPU_VENDOR_AMD: /* * Starting with Family 15h processors, TSC clock * source is in the north bridge. Check whether * we have a single-socket/multi-core platform. * XXX Need more work for complex cases. */ if (CPUID_TO_FAMILY(cpu_id) < 0x15 || (amd_feature2 & AMDID2_CMP) == 0 || smp_cpus > (cpu_procinfo2 & AMDID_CMP_CORES) + 1) break; return (1000); case CPU_VENDOR_INTEL: /* * XXX Assume Intel platforms have synchronized TSCs. */ return (1000); } return (800); } return (-100); } #undef N #else /* * The function is not called, it is provided to avoid linking failure * on uniprocessor kernel. */ static int test_tsc(void) { return (0); } #endif /* SMP */ static void init_TSC_tc(void) { uint64_t max_freq; int shift; if ((cpu_feature & CPUID_TSC) == 0 || tsc_disabled) return; /* * Limit timecounter frequency to fit in an int and prevent it from * overflowing too fast. */ max_freq = UINT_MAX; /* * We can not use the TSC if we support APM. Precise timekeeping * on an APM'ed machine is at best a fools pursuit, since * any and all of the time spent in various SMM code can't * be reliably accounted for. Reading the RTC is your only * source of reliable time info. The i8254 loses too, of course, * but we need to have some kind of time... * We don't know at this point whether APM is going to be used * or not, nor when it might be activated. Play it safe. */ if (power_pm_get_type() == POWER_PM_TYPE_APM) { tsc_timecounter.tc_quality = -1000; if (bootverbose) printf("TSC timecounter disabled: APM enabled.\n"); goto init; } /* * Intel CPUs without a C-state invariant TSC can stop the TSC * in either C2 or C3. Disable use of C2 and C3 while using * the TSC as the timecounter. The timecounter can be changed * to enable C2 and C3. * * Note that the TSC is used as the cputicker for computing * thread runtime regardless of the timecounter setting, so * using an alternate timecounter and enabling C2 or C3 can * result incorrect runtimes for kernel idle threads (but not * for any non-idle threads). */ if (cpu_deepest_sleep >= 2 && cpu_vendor_id == CPU_VENDOR_INTEL && (amd_pminfo & AMDPM_TSC_INVARIANT) == 0) { tsc_timecounter.tc_flags |= TC_FLAGS_C2STOP; if (bootverbose) printf("TSC timecounter disables C2 and C3.\n"); } /* * We can not use the TSC in SMP mode unless the TSCs on all CPUs * are synchronized. If the user is sure that the system has * synchronized TSCs, set kern.timecounter.smp_tsc tunable to a * non-zero value. The TSC seems unreliable in virtualized SMP * environments, so it is set to a negative quality in those cases. */ if (mp_ncpus > 1) tsc_timecounter.tc_quality = test_tsc(); else if (tsc_is_invariant) tsc_timecounter.tc_quality = 1000; max_freq >>= tsc_shift; init: for (shift = 0; shift <= 31 && (tsc_freq >> shift) > max_freq; shift++) ; if ((cpu_feature & CPUID_SSE2) != 0 && mp_ncpus > 1) { if (cpu_vendor_id == CPU_VENDOR_AMD) { tsc_timecounter.tc_get_timecount = shift > 0 ? tsc_get_timecount_low_mfence : tsc_get_timecount_mfence; } else { tsc_timecounter.tc_get_timecount = shift > 0 ? tsc_get_timecount_low_lfence : tsc_get_timecount_lfence; } } else { tsc_timecounter.tc_get_timecount = shift > 0 ? tsc_get_timecount_low : tsc_get_timecount; } if (shift > 0) { tsc_timecounter.tc_name = "TSC-low"; if (bootverbose) printf("TSC timecounter discards lower %d bit(s)\n", shift); } if (tsc_freq != 0) { tsc_timecounter.tc_frequency = tsc_freq >> shift; tsc_timecounter.tc_priv = (void *)(intptr_t)shift; tc_init(&tsc_timecounter); } } SYSINIT(tsc_tc, SI_SUB_SMP, SI_ORDER_ANY, init_TSC_tc, NULL); /* * When cpufreq levels change, find out about the (new) max frequency. We * use this to update CPU accounting in case it got a lower estimate at boot. */ static void tsc_levels_changed(void *arg, int unit) { device_t cf_dev; struct cf_level *levels; int count, error; uint64_t max_freq; /* Only use values from the first CPU, assuming all are equal. */ if (unit != 0) return; /* Find the appropriate cpufreq device instance. */ cf_dev = devclass_get_device(devclass_find("cpufreq"), unit); if (cf_dev == NULL) { printf("tsc_levels_changed() called but no cpufreq device?\n"); return; } /* Get settings from the device and find the max frequency. */ count = 64; levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT); if (levels == NULL) return; error = CPUFREQ_LEVELS(cf_dev, levels, &count); if (error == 0 && count != 0) { max_freq = (uint64_t)levels[0].total_set.freq * 1000000; set_cputicker(rdtsc, max_freq, 1); } else printf("tsc_levels_changed: no max freq found\n"); free(levels, M_TEMP); } /* * If the TSC timecounter is in use, veto the pending change. It may be * possible in the future to handle a dynamically-changing timecounter rate. */ static void tsc_freq_changing(void *arg, const struct cf_level *level, int *status) { if (*status != 0 || timecounter != &tsc_timecounter) return; printf("timecounter TSC must not be in use when " "changing frequencies; change denied\n"); *status = EBUSY; } /* Update TSC freq with the value indicated by the caller. */ static void tsc_freq_changed(void *arg, const struct cf_level *level, int status) { uint64_t freq; /* If there was an error during the transition, don't do anything. */ if (tsc_disabled || status != 0) return; /* Total setting for this level gives the new frequency in MHz. */ freq = (uint64_t)level->total_set.freq * 1000000; atomic_store_rel_64(&tsc_freq, freq); tsc_timecounter.tc_frequency = freq >> (int)(intptr_t)tsc_timecounter.tc_priv; } static int sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS) { int error; uint64_t freq; freq = atomic_load_acq_64(&tsc_freq); if (freq == 0) return (EOPNOTSUPP); error = sysctl_handle_64(oidp, &freq, 0, req); if (error == 0 && req->newptr != NULL) { atomic_store_rel_64(&tsc_freq, freq); atomic_store_rel_64(&tsc_timecounter.tc_frequency, freq >> (int)(intptr_t)tsc_timecounter.tc_priv); } return (error); } SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_U64 | CTLFLAG_RW, 0, 0, sysctl_machdep_tsc_freq, "QU", "Time Stamp Counter frequency"); static u_int tsc_get_timecount(struct timecounter *tc __unused) { return (rdtsc32()); } static inline u_int tsc_get_timecount_low(struct timecounter *tc) { uint32_t rv; __asm __volatile("rdtsc; shrd %%cl, %%edx, %0" : "=a" (rv) : "c" ((int)(intptr_t)tc->tc_priv) : "edx"); return (rv); } static u_int tsc_get_timecount_lfence(struct timecounter *tc __unused) { lfence(); return (rdtsc32()); } static u_int tsc_get_timecount_low_lfence(struct timecounter *tc) { lfence(); return (tsc_get_timecount_low(tc)); } static u_int tsc_get_timecount_mfence(struct timecounter *tc __unused) { mfence(); return (rdtsc32()); } static u_int tsc_get_timecount_low_mfence(struct timecounter *tc) { mfence(); return (tsc_get_timecount_low(tc)); } -uint32_t -cpu_fill_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc) +static uint32_t +x86_tsc_vdso_timehands(struct vdso_timehands *vdso_th, struct timecounter *tc) { + vdso_th->th_algo = VDSO_TH_ALGO_X86_TSC; vdso_th->th_x86_shift = (int)(intptr_t)tc->tc_priv; + vdso_th->th_x86_hpet_idx = 0xffffffff; bzero(vdso_th->th_res, sizeof(vdso_th->th_res)); - return (tc == &tsc_timecounter); + return (1); } #ifdef COMPAT_FREEBSD32 -uint32_t -cpu_fill_vdso_timehands32(struct vdso_timehands32 *vdso_th32, +static uint32_t +x86_tsc_vdso_timehands32(struct vdso_timehands32 *vdso_th32, struct timecounter *tc) { + vdso_th32->th_algo = VDSO_TH_ALGO_X86_TSC; vdso_th32->th_x86_shift = (int)(intptr_t)tc->tc_priv; + vdso_th32->th_x86_hpet_idx = 0xffffffff; bzero(vdso_th32->th_res, sizeof(vdso_th32->th_res)); - return (tc == &tsc_timecounter); + return (1); } #endif