diff --git a/sys/amd64/include/cpufunc.h b/sys/amd64/include/cpufunc.h index 93e62c0df73a..95086469d51d 100644 --- a/sys/amd64/include/cpufunc.h +++ b/sys/amd64/include/cpufunc.h @@ -1,1066 +1,1080 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1993 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Functions to provide access to special i386 instructions. * This in included in sys/systm.h, and that file should be * used in preference to this. */ #ifndef _MACHINE_CPUFUNC_H_ #define _MACHINE_CPUFUNC_H_ #ifndef _SYS_CDEFS_H_ #error this file needs sys/cdefs.h as a prerequisite #endif struct region_descriptor; #define readb(va) (*(volatile uint8_t *) (va)) #define readw(va) (*(volatile uint16_t *) (va)) #define readl(va) (*(volatile uint32_t *) (va)) #define readq(va) (*(volatile uint64_t *) (va)) #define writeb(va, d) (*(volatile uint8_t *) (va) = (d)) #define writew(va, d) (*(volatile uint16_t *) (va) = (d)) #define writel(va, d) (*(volatile uint32_t *) (va) = (d)) #define writeq(va, d) (*(volatile uint64_t *) (va) = (d)) #if defined(__GNUCLIKE_ASM) && defined(__CC_SUPPORTS___INLINE) static __inline void breakpoint(void) { __asm __volatile("int $3"); } static __inline __pure2 u_int bsfl(u_int mask) { u_int result; __asm __volatile("bsfl %1,%0" : "=r" (result) : "rm" (mask)); return (result); } static __inline __pure2 u_long bsfq(u_long mask) { u_long result; __asm __volatile("bsfq %1,%0" : "=r" (result) : "rm" (mask)); return (result); } static __inline __pure2 u_int bsrl(u_int mask) { u_int result; __asm __volatile("bsrl %1,%0" : "=r" (result) : "rm" (mask)); return (result); } static __inline __pure2 u_long bsrq(u_long mask) { u_long result; __asm __volatile("bsrq %1,%0" : "=r" (result) : "rm" (mask)); return (result); } static __inline void clflush(u_long addr) { __asm __volatile("clflush %0" : : "m" (*(char *)addr)); } static __inline void clflushopt(u_long addr) { __asm __volatile(".byte 0x66;clflush %0" : : "m" (*(char *)addr)); } static __inline void clwb(u_long addr) { __asm __volatile("clwb %0" : : "m" (*(char *)addr)); } static __inline void clts(void) { __asm __volatile("clts"); } static __inline void disable_intr(void) { __asm __volatile("cli" : : : "memory"); } static __inline void do_cpuid(u_int ax, u_int *p) { __asm __volatile("cpuid" : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (ax)); } static __inline void cpuid_count(u_int ax, u_int cx, u_int *p) { __asm __volatile("cpuid" : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (ax), "c" (cx)); } static __inline void enable_intr(void) { __asm __volatile("sti"); } #ifdef _KERNEL #define HAVE_INLINE_FFS #define ffs(x) __builtin_ffs(x) #define HAVE_INLINE_FFSL static __inline __pure2 int ffsl(long mask) { return (__builtin_ffsl(mask)); } #define HAVE_INLINE_FFSLL static __inline __pure2 int ffsll(long long mask) { return (ffsl((long)mask)); } #define HAVE_INLINE_FLS static __inline __pure2 int fls(int mask) { return (mask == 0 ? mask : (int)bsrl((u_int)mask) + 1); } #define HAVE_INLINE_FLSL static __inline __pure2 int flsl(long mask) { return (mask == 0 ? mask : (int)bsrq((u_long)mask) + 1); } #define HAVE_INLINE_FLSLL static __inline __pure2 int flsll(long long mask) { return (flsl((long)mask)); } #endif /* _KERNEL */ static __inline void halt(void) { __asm __volatile("hlt"); } static __inline u_char inb(u_int port) { u_char data; __asm __volatile("inb %w1, %0" : "=a" (data) : "Nd" (port)); return (data); } static __inline u_int inl(u_int port) { u_int data; __asm __volatile("inl %w1, %0" : "=a" (data) : "Nd" (port)); return (data); } static __inline void insb(u_int port, void *addr, size_t count) { __asm __volatile("cld; rep; insb" : "+D" (addr), "+c" (count) : "d" (port) : "memory"); } static __inline void insw(u_int port, void *addr, size_t count) { __asm __volatile("cld; rep; insw" : "+D" (addr), "+c" (count) : "d" (port) : "memory"); } static __inline void insl(u_int port, void *addr, size_t count) { __asm __volatile("cld; rep; insl" : "+D" (addr), "+c" (count) : "d" (port) : "memory"); } static __inline void invd(void) { __asm __volatile("invd"); } static __inline u_short inw(u_int port) { u_short data; __asm __volatile("inw %w1, %0" : "=a" (data) : "Nd" (port)); return (data); } static __inline void outb(u_int port, u_char data) { __asm __volatile("outb %0, %w1" : : "a" (data), "Nd" (port)); } static __inline void outl(u_int port, u_int data) { __asm __volatile("outl %0, %w1" : : "a" (data), "Nd" (port)); } static __inline void outsb(u_int port, const void *addr, size_t count) { __asm __volatile("cld; rep; outsb" : "+S" (addr), "+c" (count) : "d" (port)); } static __inline void outsw(u_int port, const void *addr, size_t count) { __asm __volatile("cld; rep; outsw" : "+S" (addr), "+c" (count) : "d" (port)); } static __inline void outsl(u_int port, const void *addr, size_t count) { __asm __volatile("cld; rep; outsl" : "+S" (addr), "+c" (count) : "d" (port)); } static __inline void outw(u_int port, u_short data) { __asm __volatile("outw %0, %w1" : : "a" (data), "Nd" (port)); } static __inline u_long popcntq(u_long mask) { u_long result; __asm __volatile("popcntq %1,%0" : "=r" (result) : "rm" (mask)); return (result); } static __inline void lfence(void) { __asm __volatile("lfence" : : : "memory"); } static __inline void mfence(void) { __asm __volatile("mfence" : : : "memory"); } static __inline void sfence(void) { __asm __volatile("sfence" : : : "memory"); } static __inline void ia32_pause(void) { __asm __volatile("pause"); } static __inline u_long read_rflags(void) { u_long rf; __asm __volatile("pushfq; popq %0" : "=r" (rf)); return (rf); } static __inline uint64_t rdmsr(u_int msr) { uint32_t low, high; __asm __volatile("rdmsr" : "=a" (low), "=d" (high) : "c" (msr)); return (low | ((uint64_t)high << 32)); } static __inline uint32_t rdmsr32(u_int msr) { uint32_t low; __asm __volatile("rdmsr" : "=a" (low) : "c" (msr) : "rdx"); return (low); } static __inline uint64_t rdpmc(u_int pmc) { uint32_t low, high; __asm __volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (pmc)); return (low | ((uint64_t)high << 32)); } static __inline uint64_t rdtsc(void) { uint32_t low, high; __asm __volatile("rdtsc" : "=a" (low), "=d" (high)); return (low | ((uint64_t)high << 32)); } +static __inline uint64_t +rdtsc_ordered_lfence(void) +{ + lfence(); + return (rdtsc()); +} + +static __inline uint64_t +rdtsc_ordered_mfence(void) +{ + mfence(); + return (rdtsc()); +} + static __inline uint64_t rdtscp(void) { uint32_t low, high; __asm __volatile("rdtscp" : "=a" (low), "=d" (high) : : "ecx"); return (low | ((uint64_t)high << 32)); } static __inline uint64_t rdtscp_aux(uint32_t *aux) { uint32_t low, high; __asm __volatile("rdtscp" : "=a" (low), "=d" (high), "=c" (*aux)); return (low | ((uint64_t)high << 32)); } static __inline uint32_t rdtsc32(void) { uint32_t rv; __asm __volatile("rdtsc" : "=a" (rv) : : "edx"); return (rv); } static __inline uint32_t rdtscp32(void) { uint32_t rv; __asm __volatile("rdtscp" : "=a" (rv) : : "ecx", "edx"); return (rv); } static __inline void wbinvd(void) { __asm __volatile("wbinvd"); } static __inline void write_rflags(u_long rf) { __asm __volatile("pushq %0; popfq" : : "r" (rf)); } static __inline void wrmsr(u_int msr, uint64_t newval) { uint32_t low, high; low = newval; high = newval >> 32; __asm __volatile("wrmsr" : : "a" (low), "d" (high), "c" (msr)); } static __inline void load_cr0(u_long data) { __asm __volatile("movq %0,%%cr0" : : "r" (data)); } static __inline u_long rcr0(void) { u_long data; __asm __volatile("movq %%cr0,%0" : "=r" (data)); return (data); } static __inline u_long rcr2(void) { u_long data; __asm __volatile("movq %%cr2,%0" : "=r" (data)); return (data); } static __inline void load_cr3(u_long data) { __asm __volatile("movq %0,%%cr3" : : "r" (data) : "memory"); } static __inline u_long rcr3(void) { u_long data; __asm __volatile("movq %%cr3,%0" : "=r" (data)); return (data); } static __inline void load_cr4(u_long data) { __asm __volatile("movq %0,%%cr4" : : "r" (data)); } static __inline u_long rcr4(void) { u_long data; __asm __volatile("movq %%cr4,%0" : "=r" (data)); return (data); } static __inline u_long rxcr(u_int reg) { u_int low, high; __asm __volatile("xgetbv" : "=a" (low), "=d" (high) : "c" (reg)); return (low | ((uint64_t)high << 32)); } static __inline void load_xcr(u_int reg, u_long val) { u_int low, high; low = val; high = val >> 32; __asm __volatile("xsetbv" : : "c" (reg), "a" (low), "d" (high)); } /* * Global TLB flush (except for thise for pages marked PG_G) */ static __inline void invltlb(void) { load_cr3(rcr3()); } #ifndef CR4_PGE #define CR4_PGE 0x00000080 /* Page global enable */ #endif /* * Perform the guaranteed invalidation of all TLB entries. This * includes the global entries, and entries in all PCIDs, not only the * current context. The function works both on non-PCID CPUs and CPUs * with the PCID turned off or on. See IA-32 SDM Vol. 3a 4.10.4.1 * Operations that Invalidate TLBs and Paging-Structure Caches. */ static __inline void invltlb_glob(void) { uint64_t cr4; cr4 = rcr4(); load_cr4(cr4 & ~CR4_PGE); /* * Although preemption at this point could be detrimental to * performance, it would not lead to an error. PG_G is simply * ignored if CR4.PGE is clear. Moreover, in case this block * is re-entered, the load_cr4() either above or below will * modify CR4.PGE flushing the TLB. */ load_cr4(cr4 | CR4_PGE); } /* * TLB flush for an individual page (even if it has PG_G). * Only works on 486+ CPUs (i386 does not have PG_G). */ static __inline void invlpg(u_long addr) { __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); } #define INVPCID_ADDR 0 #define INVPCID_CTX 1 #define INVPCID_CTXGLOB 2 #define INVPCID_ALLCTX 3 struct invpcid_descr { uint64_t pcid:12 __packed; uint64_t pad:52 __packed; uint64_t addr; } __packed; static __inline void invpcid(struct invpcid_descr *d, int type) { __asm __volatile("invpcid (%0),%1" : : "r" (d), "r" ((u_long)type) : "memory"); } static __inline u_short rfs(void) { u_short sel; __asm __volatile("movw %%fs,%0" : "=rm" (sel)); return (sel); } static __inline u_short rgs(void) { u_short sel; __asm __volatile("movw %%gs,%0" : "=rm" (sel)); return (sel); } static __inline u_short rss(void) { u_short sel; __asm __volatile("movw %%ss,%0" : "=rm" (sel)); return (sel); } static __inline void load_ds(u_short sel) { __asm __volatile("movw %0,%%ds" : : "rm" (sel)); } static __inline void load_es(u_short sel) { __asm __volatile("movw %0,%%es" : : "rm" (sel)); } static __inline void cpu_monitor(const void *addr, u_long extensions, u_int hints) { __asm __volatile("monitor" : : "a" (addr), "c" (extensions), "d" (hints)); } static __inline void cpu_mwait(u_long extensions, u_int hints) { __asm __volatile("mwait" : : "a" (hints), "c" (extensions)); } static __inline uint32_t rdpkru(void) { uint32_t res; __asm __volatile("rdpkru" : "=a" (res) : "c" (0) : "edx"); return (res); } static __inline void wrpkru(uint32_t mask) { __asm __volatile("wrpkru" : : "a" (mask), "c" (0), "d" (0)); } #ifdef _KERNEL /* This is defined in but is too painful to get to */ #ifndef MSR_FSBASE #define MSR_FSBASE 0xc0000100 #endif static __inline void load_fs(u_short sel) { /* Preserve the fsbase value across the selector load */ __asm __volatile("rdmsr; movw %0,%%fs; wrmsr" : : "rm" (sel), "c" (MSR_FSBASE) : "eax", "edx"); } #ifndef MSR_GSBASE #define MSR_GSBASE 0xc0000101 #endif static __inline void load_gs(u_short sel) { /* * Preserve the gsbase value across the selector load. * Note that we have to disable interrupts because the gsbase * being trashed happens to be the kernel gsbase at the time. */ __asm __volatile("pushfq; cli; rdmsr; movw %0,%%gs; wrmsr; popfq" : : "rm" (sel), "c" (MSR_GSBASE) : "eax", "edx"); } #else /* Usable by userland */ static __inline void load_fs(u_short sel) { __asm __volatile("movw %0,%%fs" : : "rm" (sel)); } static __inline void load_gs(u_short sel) { __asm __volatile("movw %0,%%gs" : : "rm" (sel)); } #endif static __inline uint64_t rdfsbase(void) { uint64_t x; __asm __volatile("rdfsbase %0" : "=r" (x)); return (x); } static __inline void wrfsbase(uint64_t x) { __asm __volatile("wrfsbase %0" : : "r" (x)); } static __inline uint64_t rdgsbase(void) { uint64_t x; __asm __volatile("rdgsbase %0" : "=r" (x)); return (x); } static __inline void wrgsbase(uint64_t x) { __asm __volatile("wrgsbase %0" : : "r" (x)); } static __inline void bare_lgdt(struct region_descriptor *addr) { __asm __volatile("lgdt (%0)" : : "r" (addr)); } static __inline void sgdt(struct region_descriptor *addr) { char *loc; loc = (char *)addr; __asm __volatile("sgdt %0" : "=m" (*loc) : : "memory"); } static __inline void lidt(struct region_descriptor *addr) { __asm __volatile("lidt (%0)" : : "r" (addr)); } static __inline void sidt(struct region_descriptor *addr) { char *loc; loc = (char *)addr; __asm __volatile("sidt %0" : "=m" (*loc) : : "memory"); } static __inline void lldt(u_short sel) { __asm __volatile("lldt %0" : : "r" (sel)); } static __inline u_short sldt(void) { u_short sel; __asm __volatile("sldt %0" : "=r" (sel)); return (sel); } static __inline void ltr(u_short sel) { __asm __volatile("ltr %0" : : "r" (sel)); } static __inline uint32_t read_tr(void) { u_short sel; __asm __volatile("str %0" : "=r" (sel)); return (sel); } static __inline uint64_t rdr0(void) { uint64_t data; __asm __volatile("movq %%dr0,%0" : "=r" (data)); return (data); } static __inline void load_dr0(uint64_t dr0) { __asm __volatile("movq %0,%%dr0" : : "r" (dr0)); } static __inline uint64_t rdr1(void) { uint64_t data; __asm __volatile("movq %%dr1,%0" : "=r" (data)); return (data); } static __inline void load_dr1(uint64_t dr1) { __asm __volatile("movq %0,%%dr1" : : "r" (dr1)); } static __inline uint64_t rdr2(void) { uint64_t data; __asm __volatile("movq %%dr2,%0" : "=r" (data)); return (data); } static __inline void load_dr2(uint64_t dr2) { __asm __volatile("movq %0,%%dr2" : : "r" (dr2)); } static __inline uint64_t rdr3(void) { uint64_t data; __asm __volatile("movq %%dr3,%0" : "=r" (data)); return (data); } static __inline void load_dr3(uint64_t dr3) { __asm __volatile("movq %0,%%dr3" : : "r" (dr3)); } static __inline uint64_t rdr6(void) { uint64_t data; __asm __volatile("movq %%dr6,%0" : "=r" (data)); return (data); } static __inline void load_dr6(uint64_t dr6) { __asm __volatile("movq %0,%%dr6" : : "r" (dr6)); } static __inline uint64_t rdr7(void) { uint64_t data; __asm __volatile("movq %%dr7,%0" : "=r" (data)); return (data); } static __inline void load_dr7(uint64_t dr7) { __asm __volatile("movq %0,%%dr7" : : "r" (dr7)); } static __inline register_t intr_disable(void) { register_t rflags; rflags = read_rflags(); disable_intr(); return (rflags); } static __inline void intr_restore(register_t rflags) { write_rflags(rflags); } static __inline void stac(void) { __asm __volatile("stac" : : : "cc"); } static __inline void clac(void) { __asm __volatile("clac" : : : "cc"); } enum { SGX_ECREATE = 0x0, SGX_EADD = 0x1, SGX_EINIT = 0x2, SGX_EREMOVE = 0x3, SGX_EDGBRD = 0x4, SGX_EDGBWR = 0x5, SGX_EEXTEND = 0x6, SGX_ELDU = 0x8, SGX_EBLOCK = 0x9, SGX_EPA = 0xA, SGX_EWB = 0xB, SGX_ETRACK = 0xC, }; enum { SGX_PT_SECS = 0x00, SGX_PT_TCS = 0x01, SGX_PT_REG = 0x02, SGX_PT_VA = 0x03, SGX_PT_TRIM = 0x04, }; int sgx_encls(uint32_t eax, uint64_t rbx, uint64_t rcx, uint64_t rdx); static __inline int sgx_ecreate(void *pginfo, void *secs) { return (sgx_encls(SGX_ECREATE, (uint64_t)pginfo, (uint64_t)secs, 0)); } static __inline int sgx_eadd(void *pginfo, void *epc) { return (sgx_encls(SGX_EADD, (uint64_t)pginfo, (uint64_t)epc, 0)); } static __inline int sgx_einit(void *sigstruct, void *secs, void *einittoken) { return (sgx_encls(SGX_EINIT, (uint64_t)sigstruct, (uint64_t)secs, (uint64_t)einittoken)); } static __inline int sgx_eextend(void *secs, void *epc) { return (sgx_encls(SGX_EEXTEND, (uint64_t)secs, (uint64_t)epc, 0)); } static __inline int sgx_epa(void *epc) { return (sgx_encls(SGX_EPA, SGX_PT_VA, (uint64_t)epc, 0)); } static __inline int sgx_eldu(uint64_t rbx, uint64_t rcx, uint64_t rdx) { return (sgx_encls(SGX_ELDU, rbx, rcx, rdx)); } static __inline int sgx_eremove(void *epc) { return (sgx_encls(SGX_EREMOVE, 0, (uint64_t)epc, 0)); } #else /* !(__GNUCLIKE_ASM && __CC_SUPPORTS___INLINE) */ int breakpoint(void); u_int bsfl(u_int mask); u_int bsrl(u_int mask); void clflush(u_long addr); void clts(void); void cpuid_count(u_int ax, u_int cx, u_int *p); void disable_intr(void); void do_cpuid(u_int ax, u_int *p); void enable_intr(void); void halt(void); void ia32_pause(void); u_char inb(u_int port); u_int inl(u_int port); void insb(u_int port, void *addr, size_t count); void insl(u_int port, void *addr, size_t count); void insw(u_int port, void *addr, size_t count); register_t intr_disable(void); void intr_restore(register_t rf); void invd(void); void invlpg(u_int addr); void invltlb(void); u_short inw(u_int port); void lidt(struct region_descriptor *addr); void lldt(u_short sel); void load_cr0(u_long cr0); void load_cr3(u_long cr3); void load_cr4(u_long cr4); void load_dr0(uint64_t dr0); void load_dr1(uint64_t dr1); void load_dr2(uint64_t dr2); void load_dr3(uint64_t dr3); void load_dr6(uint64_t dr6); void load_dr7(uint64_t dr7); void load_fs(u_short sel); void load_gs(u_short sel); void ltr(u_short sel); void outb(u_int port, u_char data); void outl(u_int port, u_int data); void outsb(u_int port, const void *addr, size_t count); void outsl(u_int port, const void *addr, size_t count); void outsw(u_int port, const void *addr, size_t count); void outw(u_int port, u_short data); u_long rcr0(void); u_long rcr2(void); u_long rcr3(void); u_long rcr4(void); uint64_t rdmsr(u_int msr); uint32_t rdmsr32(u_int msr); uint64_t rdpmc(u_int pmc); uint64_t rdr0(void); uint64_t rdr1(void); uint64_t rdr2(void); uint64_t rdr3(void); uint64_t rdr6(void); uint64_t rdr7(void); uint64_t rdtsc(void); u_long read_rflags(void); u_int rfs(void); u_int rgs(void); void wbinvd(void); void write_rflags(u_int rf); void wrmsr(u_int msr, uint64_t newval); #endif /* __GNUCLIKE_ASM && __CC_SUPPORTS___INLINE */ void reset_dbregs(void); #ifdef _KERNEL int rdmsr_safe(u_int msr, uint64_t *val); int wrmsr_safe(u_int msr, uint64_t newval); #endif #endif /* !_MACHINE_CPUFUNC_H_ */ diff --git a/sys/i386/include/cpufunc.h b/sys/i386/include/cpufunc.h index 7b99978facc3..79cdd3004b77 100644 --- a/sys/i386/include/cpufunc.h +++ b/sys/i386/include/cpufunc.h @@ -1,842 +1,856 @@ /*- * SPDX-License-Identifier: BSD-3-Clause * * Copyright (c) 1993 The Regents of the University of California. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* * Functions to provide access to special i386 instructions. * This in included in sys/systm.h, and that file should be * used in preference to this. */ #ifndef _MACHINE_CPUFUNC_H_ #define _MACHINE_CPUFUNC_H_ #ifndef _SYS_CDEFS_H_ #error this file needs sys/cdefs.h as a prerequisite #endif struct region_descriptor; #define readb(va) (*(volatile uint8_t *) (va)) #define readw(va) (*(volatile uint16_t *) (va)) #define readl(va) (*(volatile uint32_t *) (va)) #define writeb(va, d) (*(volatile uint8_t *) (va) = (d)) #define writew(va, d) (*(volatile uint16_t *) (va) = (d)) #define writel(va, d) (*(volatile uint32_t *) (va) = (d)) #if defined(__GNUCLIKE_ASM) && defined(__CC_SUPPORTS___INLINE) static __inline void breakpoint(void) { __asm __volatile("int $3"); } static __inline __pure2 u_int bsfl(u_int mask) { u_int result; __asm("bsfl %1,%0" : "=r" (result) : "rm" (mask) : "cc"); return (result); } static __inline __pure2 u_int bsrl(u_int mask) { u_int result; __asm("bsrl %1,%0" : "=r" (result) : "rm" (mask) : "cc"); return (result); } static __inline void clflush(u_long addr) { __asm __volatile("clflush %0" : : "m" (*(char *)addr)); } static __inline void clflushopt(u_long addr) { __asm __volatile(".byte 0x66;clflush %0" : : "m" (*(char *)addr)); } static __inline void clts(void) { __asm __volatile("clts"); } static __inline void disable_intr(void) { __asm __volatile("cli" : : : "memory"); } #ifdef _KERNEL static __inline void do_cpuid(u_int ax, u_int *p) { __asm __volatile("cpuid" : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (ax)); } static __inline void cpuid_count(u_int ax, u_int cx, u_int *p) { __asm __volatile("cpuid" : "=a" (p[0]), "=b" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (ax), "c" (cx)); } #else static __inline void do_cpuid(u_int ax, u_int *p) { __asm __volatile( "pushl\t%%ebx\n\t" "cpuid\n\t" "movl\t%%ebx,%1\n\t" "popl\t%%ebx" : "=a" (p[0]), "=DS" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (ax)); } static __inline void cpuid_count(u_int ax, u_int cx, u_int *p) { __asm __volatile( "pushl\t%%ebx\n\t" "cpuid\n\t" "movl\t%%ebx,%1\n\t" "popl\t%%ebx" : "=a" (p[0]), "=DS" (p[1]), "=c" (p[2]), "=d" (p[3]) : "0" (ax), "c" (cx)); } #endif static __inline void enable_intr(void) { __asm __volatile("sti"); } static __inline void cpu_monitor(const void *addr, u_long extensions, u_int hints) { __asm __volatile("monitor" : : "a" (addr), "c" (extensions), "d" (hints)); } static __inline void cpu_mwait(u_long extensions, u_int hints) { __asm __volatile("mwait" : : "a" (hints), "c" (extensions)); } static __inline void lfence(void) { __asm __volatile("lfence" : : : "memory"); } static __inline void mfence(void) { __asm __volatile("mfence" : : : "memory"); } static __inline void sfence(void) { __asm __volatile("sfence" : : : "memory"); } #ifdef _KERNEL #define HAVE_INLINE_FFS static __inline __pure2 int ffs(int mask) { /* * Note that gcc-2's builtin ffs would be used if we didn't declare * this inline or turn off the builtin. The builtin is faster but * broken in gcc-2.4.5 and slower but working in gcc-2.5 and later * versions. */ return (mask == 0 ? mask : (int)bsfl((u_int)mask) + 1); } #define HAVE_INLINE_FFSL static __inline __pure2 int ffsl(long mask) { return (ffs((int)mask)); } #define HAVE_INLINE_FLS static __inline __pure2 int fls(int mask) { return (mask == 0 ? mask : (int)bsrl((u_int)mask) + 1); } #define HAVE_INLINE_FLSL static __inline __pure2 int flsl(long mask) { return (fls((int)mask)); } #endif /* _KERNEL */ static __inline void halt(void) { __asm __volatile("hlt"); } static __inline u_char inb(u_int port) { u_char data; __asm __volatile("inb %w1, %0" : "=a" (data) : "Nd" (port)); return (data); } static __inline u_int inl(u_int port) { u_int data; __asm __volatile("inl %w1, %0" : "=a" (data) : "Nd" (port)); return (data); } static __inline void insb(u_int port, void *addr, size_t count) { __asm __volatile("cld; rep; insb" : "+D" (addr), "+c" (count) : "d" (port) : "memory"); } static __inline void insw(u_int port, void *addr, size_t count) { __asm __volatile("cld; rep; insw" : "+D" (addr), "+c" (count) : "d" (port) : "memory"); } static __inline void insl(u_int port, void *addr, size_t count) { __asm __volatile("cld; rep; insl" : "+D" (addr), "+c" (count) : "d" (port) : "memory"); } static __inline void invd(void) { __asm __volatile("invd"); } static __inline u_short inw(u_int port) { u_short data; __asm __volatile("inw %w1, %0" : "=a" (data) : "Nd" (port)); return (data); } static __inline void outb(u_int port, u_char data) { __asm __volatile("outb %0, %w1" : : "a" (data), "Nd" (port)); } static __inline void outl(u_int port, u_int data) { __asm __volatile("outl %0, %w1" : : "a" (data), "Nd" (port)); } static __inline void outsb(u_int port, const void *addr, size_t count) { __asm __volatile("cld; rep; outsb" : "+S" (addr), "+c" (count) : "d" (port)); } static __inline void outsw(u_int port, const void *addr, size_t count) { __asm __volatile("cld; rep; outsw" : "+S" (addr), "+c" (count) : "d" (port)); } static __inline void outsl(u_int port, const void *addr, size_t count) { __asm __volatile("cld; rep; outsl" : "+S" (addr), "+c" (count) : "d" (port)); } static __inline void outw(u_int port, u_short data) { __asm __volatile("outw %0, %w1" : : "a" (data), "Nd" (port)); } static __inline void ia32_pause(void) { __asm __volatile("pause"); } static __inline u_int read_eflags(void) { u_int ef; __asm __volatile("pushfl; popl %0" : "=r" (ef)); return (ef); } static __inline uint64_t rdmsr(u_int msr) { uint64_t rv; __asm __volatile("rdmsr" : "=A" (rv) : "c" (msr)); return (rv); } static __inline uint32_t rdmsr32(u_int msr) { uint32_t low; __asm __volatile("rdmsr" : "=a" (low) : "c" (msr) : "edx"); return (low); } static __inline uint64_t rdpmc(u_int pmc) { uint64_t rv; __asm __volatile("rdpmc" : "=A" (rv) : "c" (pmc)); return (rv); } static __inline uint64_t rdtsc(void) { uint64_t rv; __asm __volatile("rdtsc" : "=A" (rv)); return (rv); } +static __inline uint64_t +rdtsc_ordered_lfence(void) +{ + lfence(); + return (rdtsc()); +} + +static __inline uint64_t +rdtsc_ordered_mfence(void) +{ + mfence(); + return (rdtsc()); +} + static __inline uint64_t rdtscp(void) { uint64_t rv; __asm __volatile("rdtscp" : "=A" (rv) : : "ecx"); return (rv); } static __inline uint64_t rdtscp_aux(uint32_t *aux) { uint64_t rv; __asm __volatile("rdtscp" : "=A" (rv), "=c" (*aux)); return (rv); } static __inline uint32_t rdtsc32(void) { uint32_t rv; __asm __volatile("rdtsc" : "=a" (rv) : : "edx"); return (rv); } static __inline uint32_t rdtscp32(void) { uint32_t rv; __asm __volatile("rdtscp" : "=a" (rv) : : "ecx", "edx"); return (rv); } static __inline void wbinvd(void) { __asm __volatile("wbinvd"); } static __inline void write_eflags(u_int ef) { __asm __volatile("pushl %0; popfl" : : "r" (ef)); } static __inline void wrmsr(u_int msr, uint64_t newval) { __asm __volatile("wrmsr" : : "A" (newval), "c" (msr)); } static __inline void load_cr0(u_int data) { __asm __volatile("movl %0,%%cr0" : : "r" (data)); } static __inline u_int rcr0(void) { u_int data; __asm __volatile("movl %%cr0,%0" : "=r" (data)); return (data); } static __inline u_int rcr2(void) { u_int data; __asm __volatile("movl %%cr2,%0" : "=r" (data)); return (data); } static __inline void load_cr3(u_int data) { __asm __volatile("movl %0,%%cr3" : : "r" (data) : "memory"); } static __inline u_int rcr3(void) { u_int data; __asm __volatile("movl %%cr3,%0" : "=r" (data)); return (data); } static __inline void load_cr4(u_int data) { __asm __volatile("movl %0,%%cr4" : : "r" (data)); } static __inline u_int rcr4(void) { u_int data; __asm __volatile("movl %%cr4,%0" : "=r" (data)); return (data); } static __inline uint64_t rxcr(u_int reg) { u_int low, high; __asm __volatile("xgetbv" : "=a" (low), "=d" (high) : "c" (reg)); return (low | ((uint64_t)high << 32)); } static __inline void load_xcr(u_int reg, uint64_t val) { u_int low, high; low = val; high = val >> 32; __asm __volatile("xsetbv" : : "c" (reg), "a" (low), "d" (high)); } /* * Global TLB flush (except for thise for pages marked PG_G) */ static __inline void invltlb(void) { load_cr3(rcr3()); } /* * TLB flush for an individual page (even if it has PG_G). * Only works on 486+ CPUs (i386 does not have PG_G). */ static __inline void invlpg(u_int addr) { __asm __volatile("invlpg %0" : : "m" (*(char *)addr) : "memory"); } static __inline u_short rfs(void) { u_short sel; __asm __volatile("movw %%fs,%0" : "=rm" (sel)); return (sel); } static __inline uint64_t rgdt(void) { uint64_t gdtr; __asm __volatile("sgdt %0" : "=m" (gdtr)); return (gdtr); } static __inline u_short rgs(void) { u_short sel; __asm __volatile("movw %%gs,%0" : "=rm" (sel)); return (sel); } static __inline uint64_t ridt(void) { uint64_t idtr; __asm __volatile("sidt %0" : "=m" (idtr)); return (idtr); } static __inline u_short rldt(void) { u_short ldtr; __asm __volatile("sldt %0" : "=g" (ldtr)); return (ldtr); } static __inline u_short rss(void) { u_short sel; __asm __volatile("movw %%ss,%0" : "=rm" (sel)); return (sel); } static __inline u_short rtr(void) { u_short tr; __asm __volatile("str %0" : "=g" (tr)); return (tr); } static __inline void load_fs(u_short sel) { __asm __volatile("movw %0,%%fs" : : "rm" (sel)); } static __inline void load_gs(u_short sel) { __asm __volatile("movw %0,%%gs" : : "rm" (sel)); } static __inline void lidt(struct region_descriptor *addr) { __asm __volatile("lidt (%0)" : : "r" (addr)); } static __inline void lldt(u_short sel) { __asm __volatile("lldt %0" : : "r" (sel)); } static __inline void ltr(u_short sel) { __asm __volatile("ltr %0" : : "r" (sel)); } static __inline u_int rdr0(void) { u_int data; __asm __volatile("movl %%dr0,%0" : "=r" (data)); return (data); } static __inline void load_dr0(u_int dr0) { __asm __volatile("movl %0,%%dr0" : : "r" (dr0)); } static __inline u_int rdr1(void) { u_int data; __asm __volatile("movl %%dr1,%0" : "=r" (data)); return (data); } static __inline void load_dr1(u_int dr1) { __asm __volatile("movl %0,%%dr1" : : "r" (dr1)); } static __inline u_int rdr2(void) { u_int data; __asm __volatile("movl %%dr2,%0" : "=r" (data)); return (data); } static __inline void load_dr2(u_int dr2) { __asm __volatile("movl %0,%%dr2" : : "r" (dr2)); } static __inline u_int rdr3(void) { u_int data; __asm __volatile("movl %%dr3,%0" : "=r" (data)); return (data); } static __inline void load_dr3(u_int dr3) { __asm __volatile("movl %0,%%dr3" : : "r" (dr3)); } static __inline u_int rdr6(void) { u_int data; __asm __volatile("movl %%dr6,%0" : "=r" (data)); return (data); } static __inline void load_dr6(u_int dr6) { __asm __volatile("movl %0,%%dr6" : : "r" (dr6)); } static __inline u_int rdr7(void) { u_int data; __asm __volatile("movl %%dr7,%0" : "=r" (data)); return (data); } static __inline void load_dr7(u_int dr7) { __asm __volatile("movl %0,%%dr7" : : "r" (dr7)); } static __inline u_char read_cyrix_reg(u_char reg) { outb(0x22, reg); return inb(0x23); } static __inline void write_cyrix_reg(u_char reg, u_char data) { outb(0x22, reg); outb(0x23, data); } static __inline register_t intr_disable(void) { register_t eflags; eflags = read_eflags(); disable_intr(); return (eflags); } static __inline void intr_restore(register_t eflags) { write_eflags(eflags); } static __inline uint32_t rdpkru(void) { uint32_t res; __asm __volatile("rdpkru" : "=a" (res) : "c" (0) : "edx"); return (res); } static __inline void wrpkru(uint32_t mask) { __asm __volatile("wrpkru" : : "a" (mask), "c" (0), "d" (0)); } #else /* !(__GNUCLIKE_ASM && __CC_SUPPORTS___INLINE) */ int breakpoint(void); u_int bsfl(u_int mask); u_int bsrl(u_int mask); void clflush(u_long addr); void clts(void); void cpuid_count(u_int ax, u_int cx, u_int *p); void disable_intr(void); void do_cpuid(u_int ax, u_int *p); void enable_intr(void); void halt(void); void ia32_pause(void); u_char inb(u_int port); u_int inl(u_int port); void insb(u_int port, void *addr, size_t count); void insl(u_int port, void *addr, size_t count); void insw(u_int port, void *addr, size_t count); register_t intr_disable(void); void intr_restore(register_t ef); void invd(void); void invlpg(u_int addr); void invltlb(void); u_short inw(u_int port); void lidt(struct region_descriptor *addr); void lldt(u_short sel); void load_cr0(u_int cr0); void load_cr3(u_int cr3); void load_cr4(u_int cr4); void load_dr0(u_int dr0); void load_dr1(u_int dr1); void load_dr2(u_int dr2); void load_dr3(u_int dr3); void load_dr6(u_int dr6); void load_dr7(u_int dr7); void load_fs(u_short sel); void load_gs(u_short sel); void ltr(u_short sel); void outb(u_int port, u_char data); void outl(u_int port, u_int data); void outsb(u_int port, const void *addr, size_t count); void outsl(u_int port, const void *addr, size_t count); void outsw(u_int port, const void *addr, size_t count); void outw(u_int port, u_short data); u_int rcr0(void); u_int rcr2(void); u_int rcr3(void); u_int rcr4(void); uint64_t rdmsr(u_int msr); uint64_t rdpmc(u_int pmc); u_int rdr0(void); u_int rdr1(void); u_int rdr2(void); u_int rdr3(void); u_int rdr6(void); u_int rdr7(void); uint64_t rdtsc(void); u_char read_cyrix_reg(u_char reg); u_int read_eflags(void); u_int rfs(void); uint64_t rgdt(void); u_int rgs(void); uint64_t ridt(void); u_short rldt(void); u_short rtr(void); void wbinvd(void); void write_cyrix_reg(u_char reg, u_char data); void write_eflags(u_int ef); void wrmsr(u_int msr, uint64_t newval); #endif /* __GNUCLIKE_ASM && __CC_SUPPORTS___INLINE */ void reset_dbregs(void); #ifdef _KERNEL int rdmsr_safe(u_int msr, uint64_t *val); int wrmsr_safe(u_int msr, uint64_t newval); #endif #endif /* !_MACHINE_CPUFUNC_H_ */ diff --git a/sys/x86/include/x86_var.h b/sys/x86/include/x86_var.h index 3e146b44338f..5b8e41bd76d7 100644 --- a/sys/x86/include/x86_var.h +++ b/sys/x86/include/x86_var.h @@ -1,171 +1,172 @@ /*- * Copyright (c) 1995 Bruce D. Evans. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. Neither the name of the author nor the names of contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ #ifndef _X86_X86_VAR_H_ #define _X86_X86_VAR_H_ /* * Miscellaneous machine-dependent declarations. */ extern long Maxmem; extern u_int basemem; extern int busdma_swi_pending; extern u_int cpu_exthigh; extern u_int cpu_feature; extern u_int cpu_feature2; extern u_int amd_feature; extern u_int amd_feature2; extern u_int amd_rascap; extern u_int amd_pminfo; extern u_int amd_extended_feature_extensions; extern u_int via_feature_rng; extern u_int via_feature_xcrypt; extern u_int cpu_clflush_line_size; extern u_int cpu_stdext_feature; extern u_int cpu_stdext_feature2; extern u_int cpu_stdext_feature3; extern uint64_t cpu_ia32_arch_caps; extern u_int cpu_fxsr; extern u_int cpu_high; extern u_int cpu_id; extern u_int cpu_max_ext_state_size; extern u_int cpu_mxcsr_mask; extern u_int cpu_procinfo; extern u_int cpu_procinfo2; extern char cpu_vendor[]; extern u_int cpu_vendor_id; extern u_int cpu_mon_mwait_flags; extern u_int cpu_mon_min_size; extern u_int cpu_mon_max_size; extern u_int cpu_maxphyaddr; extern u_int hv_high; extern char hv_vendor[]; extern char kstack[]; extern char sigcode[]; extern int szsigcode; extern int vm_page_dump_size; extern int workaround_erratum383; extern int _udatasel; extern int _ucodesel; extern int _ucode32sel; extern int _ufssel; extern int _ugssel; extern int use_xsave; extern uint64_t xsave_mask; extern u_int max_apic_id; extern int pti; extern int hw_ibrs_ibpb_active; extern int hw_mds_disable; extern int hw_ssb_active; extern int x86_taa_enable; extern int cpu_flush_rsb_ctxsw; extern int x86_rngds_mitg_enable; extern int cpu_amdc1e_bug; struct pcb; struct thread; struct reg; struct fpreg; struct dbreg; struct dumperinfo; struct trapframe; /* * The interface type of the interrupt handler entry point cannot be * expressed in C. Use simplest non-variadic function type as an * approximation. */ typedef void alias_for_inthand_t(void); /* * Returns the maximum physical address that can be used with the * current system. */ static __inline vm_paddr_t cpu_getmaxphyaddr(void) { #if defined(__i386__) && !defined(PAE) return (0xffffffff); #else return ((1ULL << cpu_maxphyaddr) - 1); #endif } bool acpi_get_fadt_bootflags(uint16_t *flagsp); void *alloc_fpusave(int flags); void busdma_swi(void); u_int cpu_auxmsr(void); bool cpu_mwait_usable(void); void cpu_probe_amdc1e(void); void cpu_setregs(void); void x86_clear_dbregs(struct pcb *pcb); bool disable_wp(void); void restore_wp(bool old_wp); void dump_add_page(vm_paddr_t); void dump_drop_page(vm_paddr_t); void finishidentcpu(void); void identify_cpu1(void); void identify_cpu2(void); void identify_cpu_fixup_bsp(void); void identify_hypervisor(void); void initializecpu(void); void initializecpucache(void); bool fix_cpuid(void); void fillw(int /*u_short*/ pat, void *base, size_t cnt); int is_physical_memory(vm_paddr_t addr); int isa_nmi(int cd); void handle_ibrs_entry(void); void handle_ibrs_exit(void); void hw_ibrs_recalculate(bool all_cpus); void hw_mds_recalculate(void); void hw_ssb_recalculate(bool all_cpus); void x86_taa_recalculate(void); void x86_rngds_mitg_recalculate(bool all_cpus); void nmi_call_kdb(u_int cpu, u_int type, struct trapframe *frame); void nmi_call_kdb_smp(u_int type, struct trapframe *frame); void nmi_handle_intr(u_int type, struct trapframe *frame); void pagecopy(void *from, void *to); void printcpuinfo(void); int pti_get_default(void); int user_dbreg_trap(register_t dr6); int minidumpsys(struct dumperinfo *); struct pcb *get_pcb_td(struct thread *td); +uint64_t rdtsc_ordered(void); #define MSR_OP_ANDNOT 0x00000001 #define MSR_OP_OR 0x00000002 #define MSR_OP_WRITE 0x00000003 #define MSR_OP_LOCAL 0x10000000 #define MSR_OP_SCHED 0x20000000 #define MSR_OP_RENDEZVOUS 0x30000000 void x86_msr_op(u_int msr, u_int op, uint64_t arg1); #endif diff --git a/sys/x86/x86/cpu_machdep.c b/sys/x86/x86/cpu_machdep.c index 8bceaee913db..26423228d5dc 100644 --- a/sys/x86/x86/cpu_machdep.c +++ b/sys/x86/x86/cpu_machdep.c @@ -1,1423 +1,1439 @@ /*- * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1992 Terrence R. Lambert. * Copyright (c) 1982, 1987, 1990 The Regents of the University of California. * All rights reserved. * * This code is derived from software contributed to Berkeley by * William Jolitz. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. All advertising materials mentioning features or use of this software * must display the following acknowledgement: * This product includes software developed by the University of * California, Berkeley and its contributors. * 4. Neither the name of the University nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * from: @(#)machdep.c 7.4 (Berkeley) 6/3/91 */ #include __FBSDID("$FreeBSD$"); #include "opt_acpi.h" #include "opt_atpic.h" #include "opt_cpu.h" #include "opt_ddb.h" #include "opt_inet.h" #include "opt_isa.h" #include "opt_kdb.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" #include "opt_mp_watchdog.h" #include "opt_platform.h" #ifdef __i386__ #include "opt_apic.h" #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include +#include #include #include #include #include #include #ifdef SMP #include #endif #ifdef CPU_ELAN #include #endif #include +#include #include #include #include #include #include #include #include #include #include #include #define STATE_RUNNING 0x0 #define STATE_MWAIT 0x1 #define STATE_SLEEPING 0x2 #ifdef SMP static u_int cpu_reset_proxyid; static volatile u_int cpu_reset_proxy_active; #endif struct msr_op_arg { u_int msr; int op; uint64_t arg1; }; static void x86_msr_op_one(void *argp) { struct msr_op_arg *a; uint64_t v; a = argp; switch (a->op) { case MSR_OP_ANDNOT: v = rdmsr(a->msr); v &= ~a->arg1; wrmsr(a->msr, v); break; case MSR_OP_OR: v = rdmsr(a->msr); v |= a->arg1; wrmsr(a->msr, v); break; case MSR_OP_WRITE: wrmsr(a->msr, a->arg1); break; } } #define MSR_OP_EXMODE_MASK 0xf0000000 #define MSR_OP_OP_MASK 0x000000ff void x86_msr_op(u_int msr, u_int op, uint64_t arg1) { struct thread *td; struct msr_op_arg a; u_int exmode; int bound_cpu, i, is_bound; a.op = op & MSR_OP_OP_MASK; MPASS(a.op == MSR_OP_ANDNOT || a.op == MSR_OP_OR || a.op == MSR_OP_WRITE); exmode = op & MSR_OP_EXMODE_MASK; MPASS(exmode == MSR_OP_LOCAL || exmode == MSR_OP_SCHED || exmode == MSR_OP_RENDEZVOUS); a.msr = msr; a.arg1 = arg1; switch (exmode) { case MSR_OP_LOCAL: x86_msr_op_one(&a); break; case MSR_OP_SCHED: td = curthread; thread_lock(td); is_bound = sched_is_bound(td); bound_cpu = td->td_oncpu; CPU_FOREACH(i) { sched_bind(td, i); x86_msr_op_one(&a); } if (is_bound) sched_bind(td, bound_cpu); else sched_unbind(td); thread_unlock(td); break; case MSR_OP_RENDEZVOUS: smp_rendezvous(NULL, x86_msr_op_one, NULL, &a); break; } } /* * Machine dependent boot() routine * * I haven't seen anything to put here yet * Possibly some stuff might be grafted back here from boot() */ void cpu_boot(int howto) { } /* * Flush the D-cache for non-DMA I/O so that the I-cache can * be made coherent later. */ void cpu_flush_dcache(void *ptr, size_t len) { /* Not applicable */ } void acpi_cpu_c1(void) { __asm __volatile("sti; hlt"); } /* * Use mwait to pause execution while waiting for an interrupt or * another thread to signal that there is more work. * * NOTE: Interrupts will cause a wakeup; however, this function does * not enable interrupt handling. The caller is responsible to enable * interrupts. */ void acpi_cpu_idle_mwait(uint32_t mwait_hint) { int *state; uint64_t v; /* * A comment in Linux patch claims that 'CPUs run faster with * speculation protection disabled. All CPU threads in a core * must disable speculation protection for it to be * disabled. Disable it while we are idle so the other * hyperthread can run fast.' * * XXXKIB. Software coordination mode should be supported, * but all Intel CPUs provide hardware coordination. */ state = (int *)PCPU_PTR(monitorbuf); KASSERT(atomic_load_int(state) == STATE_SLEEPING, ("cpu_mwait_cx: wrong monitorbuf state")); atomic_store_int(state, STATE_MWAIT); if (PCPU_GET(ibpb_set) || hw_ssb_active) { v = rdmsr(MSR_IA32_SPEC_CTRL); wrmsr(MSR_IA32_SPEC_CTRL, v & ~(IA32_SPEC_CTRL_IBRS | IA32_SPEC_CTRL_STIBP | IA32_SPEC_CTRL_SSBD)); } else { v = 0; } cpu_monitor(state, 0, 0); if (atomic_load_int(state) == STATE_MWAIT) cpu_mwait(MWAIT_INTRBREAK, mwait_hint); /* * SSB cannot be disabled while we sleep, or rather, if it was * disabled, the sysctl thread will bind to our cpu to tweak * MSR. */ if (v != 0) wrmsr(MSR_IA32_SPEC_CTRL, v); /* * We should exit on any event that interrupts mwait, because * that event might be a wanted interrupt. */ atomic_store_int(state, STATE_RUNNING); } /* Get current clock frequency for the given cpu id. */ int cpu_est_clockrate(int cpu_id, uint64_t *rate) { uint64_t tsc1, tsc2; uint64_t acnt, mcnt, perf; register_t reg; if (pcpu_find(cpu_id) == NULL || rate == NULL) return (EINVAL); #ifdef __i386__ if ((cpu_feature & CPUID_TSC) == 0) return (EOPNOTSUPP); #endif /* * If TSC is P-state invariant and APERF/MPERF MSRs do not exist, * DELAY(9) based logic fails. */ if (tsc_is_invariant && !tsc_perf_stat) return (EOPNOTSUPP); #ifdef SMP if (smp_cpus > 1) { /* Schedule ourselves on the indicated cpu. */ thread_lock(curthread); sched_bind(curthread, cpu_id); thread_unlock(curthread); } #endif /* Calibrate by measuring a short delay. */ reg = intr_disable(); if (tsc_is_invariant) { wrmsr(MSR_MPERF, 0); wrmsr(MSR_APERF, 0); tsc1 = rdtsc(); DELAY(1000); mcnt = rdmsr(MSR_MPERF); acnt = rdmsr(MSR_APERF); tsc2 = rdtsc(); intr_restore(reg); perf = 1000 * acnt / mcnt; *rate = (tsc2 - tsc1) * perf; } else { tsc1 = rdtsc(); DELAY(1000); tsc2 = rdtsc(); intr_restore(reg); *rate = (tsc2 - tsc1) * 1000; } #ifdef SMP if (smp_cpus > 1) { thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); } #endif return (0); } /* * Shutdown the CPU as much as possible */ void cpu_halt(void) { for (;;) halt(); } static void cpu_reset_real(void) { struct region_descriptor null_idt; int b; disable_intr(); #ifdef CPU_ELAN if (elan_mmcr != NULL) elan_mmcr->RESCFG = 1; #endif #ifdef __i386__ if (cpu == CPU_GEODE1100) { /* Attempt Geode's own reset */ outl(0xcf8, 0x80009044ul); outl(0xcfc, 0xf); } #endif #if !defined(BROKEN_KEYBOARD_RESET) /* * Attempt to do a CPU reset via the keyboard controller, * do not turn off GateA20, as any machine that fails * to do the reset here would then end up in no man's land. */ outb(IO_KBD + 4, 0xFE); DELAY(500000); /* wait 0.5 sec to see if that did it */ #endif /* * Attempt to force a reset via the Reset Control register at * I/O port 0xcf9. Bit 2 forces a system reset when it * transitions from 0 to 1. Bit 1 selects the type of reset * to attempt: 0 selects a "soft" reset, and 1 selects a * "hard" reset. We try a "hard" reset. The first write sets * bit 1 to select a "hard" reset and clears bit 2. The * second write forces a 0 -> 1 transition in bit 2 to trigger * a reset. */ outb(0xcf9, 0x2); outb(0xcf9, 0x6); DELAY(500000); /* wait 0.5 sec to see if that did it */ /* * Attempt to force a reset via the Fast A20 and Init register * at I/O port 0x92. Bit 1 serves as an alternate A20 gate. * Bit 0 asserts INIT# when set to 1. We are careful to only * preserve bit 1 while setting bit 0. We also must clear bit * 0 before setting it if it isn't already clear. */ b = inb(0x92); if (b != 0xff) { if ((b & 0x1) != 0) outb(0x92, b & 0xfe); outb(0x92, b | 0x1); DELAY(500000); /* wait 0.5 sec to see if that did it */ } printf("No known reset method worked, attempting CPU shutdown\n"); DELAY(1000000); /* wait 1 sec for printf to complete */ /* Wipe the IDT. */ null_idt.rd_limit = 0; null_idt.rd_base = 0; lidt(&null_idt); /* "good night, sweet prince .... " */ breakpoint(); /* NOTREACHED */ while(1); } #ifdef SMP static void cpu_reset_proxy(void) { cpu_reset_proxy_active = 1; while (cpu_reset_proxy_active == 1) ia32_pause(); /* Wait for other cpu to see that we've started */ printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid); DELAY(1000000); cpu_reset_real(); } #endif void cpu_reset(void) { #ifdef SMP cpuset_t map; u_int cnt; if (smp_started) { map = all_cpus; CPU_CLR(PCPU_GET(cpuid), &map); CPU_NAND(&map, &stopped_cpus); if (!CPU_EMPTY(&map)) { printf("cpu_reset: Stopping other CPUs\n"); stop_cpus(map); } if (PCPU_GET(cpuid) != 0) { cpu_reset_proxyid = PCPU_GET(cpuid); cpustop_restartfunc = cpu_reset_proxy; cpu_reset_proxy_active = 0; printf("cpu_reset: Restarting BSP\n"); /* Restart CPU #0. */ CPU_SETOF(0, &started_cpus); cnt = 0; while (cpu_reset_proxy_active == 0 && cnt < 10000000) { ia32_pause(); cnt++; /* Wait for BSP to announce restart */ } if (cpu_reset_proxy_active == 0) { printf("cpu_reset: Failed to restart BSP\n"); } else { cpu_reset_proxy_active = 2; while (1) ia32_pause(); /* NOTREACHED */ } } DELAY(1000000); } #endif cpu_reset_real(); /* NOTREACHED */ } bool cpu_mwait_usable(void) { return ((cpu_feature2 & CPUID2_MON) != 0 && ((cpu_mon_mwait_flags & (CPUID5_MON_MWAIT_EXT | CPUID5_MWAIT_INTRBREAK)) == (CPUID5_MON_MWAIT_EXT | CPUID5_MWAIT_INTRBREAK))); } void (*cpu_idle_hook)(sbintime_t) = NULL; /* ACPI idle hook. */ int cpu_amdc1e_bug = 0; /* AMD C1E APIC workaround required. */ static int idle_mwait = 1; /* Use MONITOR/MWAIT for short idle. */ SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RWTUN, &idle_mwait, 0, "Use MONITOR/MWAIT for short idle"); static void cpu_idle_acpi(sbintime_t sbt) { int *state; state = (int *)PCPU_PTR(monitorbuf); atomic_store_int(state, STATE_SLEEPING); /* See comments in cpu_idle_hlt(). */ disable_intr(); if (sched_runnable()) enable_intr(); else if (cpu_idle_hook) cpu_idle_hook(sbt); else acpi_cpu_c1(); atomic_store_int(state, STATE_RUNNING); } static void cpu_idle_hlt(sbintime_t sbt) { int *state; state = (int *)PCPU_PTR(monitorbuf); atomic_store_int(state, STATE_SLEEPING); /* * Since we may be in a critical section from cpu_idle(), if * an interrupt fires during that critical section we may have * a pending preemption. If the CPU halts, then that thread * may not execute until a later interrupt awakens the CPU. * To handle this race, check for a runnable thread after * disabling interrupts and immediately return if one is * found. Also, we must absolutely guarentee that hlt is * the next instruction after sti. This ensures that any * interrupt that fires after the call to disable_intr() will * immediately awaken the CPU from hlt. Finally, please note * that on x86 this works fine because of interrupts enabled only * after the instruction following sti takes place, while IF is set * to 1 immediately, allowing hlt instruction to acknowledge the * interrupt. */ disable_intr(); if (sched_runnable()) enable_intr(); else acpi_cpu_c1(); atomic_store_int(state, STATE_RUNNING); } static void cpu_idle_mwait(sbintime_t sbt) { int *state; state = (int *)PCPU_PTR(monitorbuf); atomic_store_int(state, STATE_MWAIT); /* See comments in cpu_idle_hlt(). */ disable_intr(); if (sched_runnable()) { atomic_store_int(state, STATE_RUNNING); enable_intr(); return; } cpu_monitor(state, 0, 0); if (atomic_load_int(state) == STATE_MWAIT) __asm __volatile("sti; mwait" : : "a" (MWAIT_C1), "c" (0)); else enable_intr(); atomic_store_int(state, STATE_RUNNING); } static void cpu_idle_spin(sbintime_t sbt) { int *state; int i; state = (int *)PCPU_PTR(monitorbuf); atomic_store_int(state, STATE_RUNNING); /* * The sched_runnable() call is racy but as long as there is * a loop missing it one time will have just a little impact if any * (and it is much better than missing the check at all). */ for (i = 0; i < 1000; i++) { if (sched_runnable()) return; cpu_spinwait(); } } void (*cpu_idle_fn)(sbintime_t) = cpu_idle_acpi; void cpu_idle(int busy) { uint64_t msr; sbintime_t sbt = -1; CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu); #ifdef MP_WATCHDOG ap_watchdog(PCPU_GET(cpuid)); #endif /* If we are busy - try to use fast methods. */ if (busy) { if ((cpu_feature2 & CPUID2_MON) && idle_mwait) { cpu_idle_mwait(busy); goto out; } } /* If we have time - switch timers into idle mode. */ if (!busy) { critical_enter(); sbt = cpu_idleclock(); } /* Apply AMD APIC timer C1E workaround. */ if (cpu_amdc1e_bug && cpu_disable_c3_sleep) { msr = rdmsr(MSR_AMDK8_IPM); if ((msr & (AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT)) != 0) wrmsr(MSR_AMDK8_IPM, msr & ~(AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT)); } /* Call main idle method. */ cpu_idle_fn(sbt); /* Switch timers back into active mode. */ if (!busy) { cpu_activeclock(); critical_exit(); } out: CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done", busy, curcpu); } static int cpu_idle_apl31_workaround; SYSCTL_INT(_machdep, OID_AUTO, idle_apl31, CTLFLAG_RW, &cpu_idle_apl31_workaround, 0, "Apollo Lake APL31 MWAIT bug workaround"); int cpu_idle_wakeup(int cpu) { int *state; state = (int *)pcpu_find(cpu)->pc_monitorbuf; switch (atomic_load_int(state)) { case STATE_SLEEPING: return (0); case STATE_MWAIT: atomic_store_int(state, STATE_RUNNING); return (cpu_idle_apl31_workaround ? 0 : 1); case STATE_RUNNING: return (1); default: panic("bad monitor state"); return (1); } } /* * Ordered by speed/power consumption. */ static struct { void *id_fn; char *id_name; int id_cpuid2_flag; } idle_tbl[] = { { .id_fn = cpu_idle_spin, .id_name = "spin" }, { .id_fn = cpu_idle_mwait, .id_name = "mwait", .id_cpuid2_flag = CPUID2_MON }, { .id_fn = cpu_idle_hlt, .id_name = "hlt" }, { .id_fn = cpu_idle_acpi, .id_name = "acpi" }, }; static int idle_sysctl_available(SYSCTL_HANDLER_ARGS) { char *avail, *p; int error; int i; avail = malloc(256, M_TEMP, M_WAITOK); p = avail; for (i = 0; i < nitems(idle_tbl); i++) { if (idle_tbl[i].id_cpuid2_flag != 0 && (cpu_feature2 & idle_tbl[i].id_cpuid2_flag) == 0) continue; if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && cpu_idle_hook == NULL) continue; p += sprintf(p, "%s%s", p != avail ? ", " : "", idle_tbl[i].id_name); } error = sysctl_handle_string(oidp, avail, 0, req); free(avail, M_TEMP); return (error); } SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD, 0, 0, idle_sysctl_available, "A", "list of available idle functions"); static bool cpu_idle_selector(const char *new_idle_name) { int i; for (i = 0; i < nitems(idle_tbl); i++) { if (idle_tbl[i].id_cpuid2_flag != 0 && (cpu_feature2 & idle_tbl[i].id_cpuid2_flag) == 0) continue; if (strcmp(idle_tbl[i].id_name, "acpi") == 0 && cpu_idle_hook == NULL) continue; if (strcmp(idle_tbl[i].id_name, new_idle_name)) continue; cpu_idle_fn = idle_tbl[i].id_fn; if (bootverbose) printf("CPU idle set to %s\n", idle_tbl[i].id_name); return (true); } return (false); } static int cpu_idle_sysctl(SYSCTL_HANDLER_ARGS) { char buf[16], *p; int error, i; p = "unknown"; for (i = 0; i < nitems(idle_tbl); i++) { if (idle_tbl[i].id_fn == cpu_idle_fn) { p = idle_tbl[i].id_name; break; } } strncpy(buf, p, sizeof(buf)); error = sysctl_handle_string(oidp, buf, sizeof(buf), req); if (error != 0 || req->newptr == NULL) return (error); return (cpu_idle_selector(buf) ? 0 : EINVAL); } SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0, cpu_idle_sysctl, "A", "currently selected idle function"); static void cpu_idle_tun(void *unused __unused) { char tunvar[16]; if (TUNABLE_STR_FETCH("machdep.idle", tunvar, sizeof(tunvar))) cpu_idle_selector(tunvar); else if (cpu_vendor_id == CPU_VENDOR_AMD && CPUID_TO_FAMILY(cpu_id) == 0x17 && CPUID_TO_MODEL(cpu_id) == 0x1) { /* Ryzen erratas 1057, 1109. */ cpu_idle_selector("hlt"); idle_mwait = 0; } if (cpu_vendor_id == CPU_VENDOR_INTEL && cpu_id == 0x506c9) { /* * Apollo Lake errata APL31 (public errata APL30). * Stores to the armed address range may not trigger * MWAIT to resume execution. OS needs to use * interrupts to wake processors from MWAIT-induced * sleep states. */ cpu_idle_apl31_workaround = 1; } TUNABLE_INT_FETCH("machdep.idle_apl31", &cpu_idle_apl31_workaround); } SYSINIT(cpu_idle_tun, SI_SUB_CPU, SI_ORDER_MIDDLE, cpu_idle_tun, NULL); static int panic_on_nmi = 0xff; SYSCTL_INT(_machdep, OID_AUTO, panic_on_nmi, CTLFLAG_RWTUN, &panic_on_nmi, 0, "Panic on NMI: 1 = H/W failure; 2 = unknown; 0xff = all"); int nmi_is_broadcast = 1; SYSCTL_INT(_machdep, OID_AUTO, nmi_is_broadcast, CTLFLAG_RWTUN, &nmi_is_broadcast, 0, "Chipset NMI is broadcast"); int (*apei_nmi)(void); void nmi_call_kdb(u_int cpu, u_int type, struct trapframe *frame) { bool claimed = false; #ifdef DEV_ISA /* machine/parity/power fail/"kitchen sink" faults */ if (isa_nmi(frame->tf_err)) { claimed = true; if ((panic_on_nmi & 1) != 0) panic("NMI indicates hardware failure"); } #endif /* DEV_ISA */ /* ACPI Platform Error Interfaces callback. */ if (apei_nmi != NULL && (*apei_nmi)()) claimed = true; /* * NMIs can be useful for debugging. They can be hooked up to a * pushbutton, usually on an ISA, PCI, or PCIe card. They can also be * generated by an IPMI BMC, either manually or in response to a * watchdog timeout. For example, see the "power diag" command in * ports/sysutils/ipmitool. They can also be generated by a * hypervisor; see "bhyvectl --inject-nmi". */ #ifdef KDB if (!claimed && (panic_on_nmi & 2) != 0) { if (debugger_on_panic) { printf("NMI/cpu%d ... going to debugger\n", cpu); claimed = kdb_trap(type, 0, frame); } } #endif /* KDB */ if (!claimed && panic_on_nmi != 0) panic("NMI"); } void nmi_handle_intr(u_int type, struct trapframe *frame) { #ifdef SMP if (nmi_is_broadcast) { nmi_call_kdb_smp(type, frame); return; } #endif nmi_call_kdb(PCPU_GET(cpuid), type, frame); } static int hw_ibrs_active; int hw_ibrs_ibpb_active; int hw_ibrs_disable = 1; SYSCTL_INT(_hw, OID_AUTO, ibrs_active, CTLFLAG_RD, &hw_ibrs_active, 0, "Indirect Branch Restricted Speculation active"); void hw_ibrs_recalculate(bool for_all_cpus) { if ((cpu_ia32_arch_caps & IA32_ARCH_CAP_IBRS_ALL) != 0) { x86_msr_op(MSR_IA32_SPEC_CTRL, (for_all_cpus ? MSR_OP_RENDEZVOUS : MSR_OP_LOCAL) | (hw_ibrs_disable != 0 ? MSR_OP_ANDNOT : MSR_OP_OR), IA32_SPEC_CTRL_IBRS); hw_ibrs_active = hw_ibrs_disable == 0; hw_ibrs_ibpb_active = 0; } else { hw_ibrs_active = hw_ibrs_ibpb_active = (cpu_stdext_feature3 & CPUID_STDEXT3_IBPB) != 0 && !hw_ibrs_disable; } } static int hw_ibrs_disable_handler(SYSCTL_HANDLER_ARGS) { int error, val; val = hw_ibrs_disable; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); hw_ibrs_disable = val != 0; hw_ibrs_recalculate(true); return (0); } SYSCTL_PROC(_hw, OID_AUTO, ibrs_disable, CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, NULL, 0, hw_ibrs_disable_handler, "I", "Disable Indirect Branch Restricted Speculation"); int hw_ssb_active; int hw_ssb_disable; SYSCTL_INT(_hw, OID_AUTO, spec_store_bypass_disable_active, CTLFLAG_RD, &hw_ssb_active, 0, "Speculative Store Bypass Disable active"); static void hw_ssb_set(bool enable, bool for_all_cpus) { if ((cpu_stdext_feature3 & CPUID_STDEXT3_SSBD) == 0) { hw_ssb_active = 0; return; } hw_ssb_active = enable; x86_msr_op(MSR_IA32_SPEC_CTRL, (enable ? MSR_OP_OR : MSR_OP_ANDNOT) | (for_all_cpus ? MSR_OP_SCHED : MSR_OP_LOCAL), IA32_SPEC_CTRL_SSBD); } void hw_ssb_recalculate(bool all_cpus) { switch (hw_ssb_disable) { default: hw_ssb_disable = 0; /* FALLTHROUGH */ case 0: /* off */ hw_ssb_set(false, all_cpus); break; case 1: /* on */ hw_ssb_set(true, all_cpus); break; case 2: /* auto */ hw_ssb_set((cpu_ia32_arch_caps & IA32_ARCH_CAP_SSB_NO) != 0 ? false : true, all_cpus); break; } } static int hw_ssb_disable_handler(SYSCTL_HANDLER_ARGS) { int error, val; val = hw_ssb_disable; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); hw_ssb_disable = val; hw_ssb_recalculate(true); return (0); } SYSCTL_PROC(_hw, OID_AUTO, spec_store_bypass_disable, CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, NULL, 0, hw_ssb_disable_handler, "I", "Speculative Store Bypass Disable (0 - off, 1 - on, 2 - auto"); int hw_mds_disable; /* * Handler for Microarchitectural Data Sampling issues. Really not a * pointer to C function: on amd64 the code must not change any CPU * architectural state except possibly %rflags. Also, it is always * called with interrupts disabled. */ void mds_handler_void(void); void mds_handler_verw(void); void mds_handler_ivb(void); void mds_handler_bdw(void); void mds_handler_skl_sse(void); void mds_handler_skl_avx(void); void mds_handler_skl_avx512(void); void mds_handler_silvermont(void); void (*mds_handler)(void) = mds_handler_void; static int sysctl_hw_mds_disable_state_handler(SYSCTL_HANDLER_ARGS) { const char *state; if (mds_handler == mds_handler_void) state = "inactive"; else if (mds_handler == mds_handler_verw) state = "VERW"; else if (mds_handler == mds_handler_ivb) state = "software IvyBridge"; else if (mds_handler == mds_handler_bdw) state = "software Broadwell"; else if (mds_handler == mds_handler_skl_sse) state = "software Skylake SSE"; else if (mds_handler == mds_handler_skl_avx) state = "software Skylake AVX"; else if (mds_handler == mds_handler_skl_avx512) state = "software Skylake AVX512"; else if (mds_handler == mds_handler_silvermont) state = "software Silvermont"; else state = "unknown"; return (SYSCTL_OUT(req, state, strlen(state))); } SYSCTL_PROC(_hw, OID_AUTO, mds_disable_state, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_hw_mds_disable_state_handler, "A", "Microarchitectural Data Sampling Mitigation state"); _Static_assert(__offsetof(struct pcpu, pc_mds_tmp) % 64 == 0, "MDS AVX512"); void hw_mds_recalculate(void) { struct pcpu *pc; vm_offset_t b64; u_long xcr0; int i; /* * Allow user to force VERW variant even if MD_CLEAR is not * reported. For instance, hypervisor might unknowingly * filter the cap out. * For the similar reasons, and for testing, allow to enable * mitigation even when MDS_NO cap is set. */ if (cpu_vendor_id != CPU_VENDOR_INTEL || hw_mds_disable == 0 || ((cpu_ia32_arch_caps & IA32_ARCH_CAP_MDS_NO) != 0 && hw_mds_disable == 3)) { mds_handler = mds_handler_void; } else if (((cpu_stdext_feature3 & CPUID_STDEXT3_MD_CLEAR) != 0 && hw_mds_disable == 3) || hw_mds_disable == 1) { mds_handler = mds_handler_verw; } else if (CPUID_TO_FAMILY(cpu_id) == 0x6 && (CPUID_TO_MODEL(cpu_id) == 0x2e || CPUID_TO_MODEL(cpu_id) == 0x1e || CPUID_TO_MODEL(cpu_id) == 0x1f || CPUID_TO_MODEL(cpu_id) == 0x1a || CPUID_TO_MODEL(cpu_id) == 0x2f || CPUID_TO_MODEL(cpu_id) == 0x25 || CPUID_TO_MODEL(cpu_id) == 0x2c || CPUID_TO_MODEL(cpu_id) == 0x2d || CPUID_TO_MODEL(cpu_id) == 0x2a || CPUID_TO_MODEL(cpu_id) == 0x3e || CPUID_TO_MODEL(cpu_id) == 0x3a) && (hw_mds_disable == 2 || hw_mds_disable == 3)) { /* * Nehalem, SandyBridge, IvyBridge */ CPU_FOREACH(i) { pc = pcpu_find(i); if (pc->pc_mds_buf == NULL) { pc->pc_mds_buf = malloc_domainset(672, M_TEMP, DOMAINSET_PREF(pc->pc_domain), M_WAITOK); bzero(pc->pc_mds_buf, 16); } } mds_handler = mds_handler_ivb; } else if (CPUID_TO_FAMILY(cpu_id) == 0x6 && (CPUID_TO_MODEL(cpu_id) == 0x3f || CPUID_TO_MODEL(cpu_id) == 0x3c || CPUID_TO_MODEL(cpu_id) == 0x45 || CPUID_TO_MODEL(cpu_id) == 0x46 || CPUID_TO_MODEL(cpu_id) == 0x56 || CPUID_TO_MODEL(cpu_id) == 0x4f || CPUID_TO_MODEL(cpu_id) == 0x47 || CPUID_TO_MODEL(cpu_id) == 0x3d) && (hw_mds_disable == 2 || hw_mds_disable == 3)) { /* * Haswell, Broadwell */ CPU_FOREACH(i) { pc = pcpu_find(i); if (pc->pc_mds_buf == NULL) { pc->pc_mds_buf = malloc_domainset(1536, M_TEMP, DOMAINSET_PREF(pc->pc_domain), M_WAITOK); bzero(pc->pc_mds_buf, 16); } } mds_handler = mds_handler_bdw; } else if (CPUID_TO_FAMILY(cpu_id) == 0x6 && ((CPUID_TO_MODEL(cpu_id) == 0x55 && (cpu_id & CPUID_STEPPING) <= 5) || CPUID_TO_MODEL(cpu_id) == 0x4e || CPUID_TO_MODEL(cpu_id) == 0x5e || (CPUID_TO_MODEL(cpu_id) == 0x8e && (cpu_id & CPUID_STEPPING) <= 0xb) || (CPUID_TO_MODEL(cpu_id) == 0x9e && (cpu_id & CPUID_STEPPING) <= 0xc)) && (hw_mds_disable == 2 || hw_mds_disable == 3)) { /* * Skylake, KabyLake, CoffeeLake, WhiskeyLake, * CascadeLake */ CPU_FOREACH(i) { pc = pcpu_find(i); if (pc->pc_mds_buf == NULL) { pc->pc_mds_buf = malloc_domainset(6 * 1024, M_TEMP, DOMAINSET_PREF(pc->pc_domain), M_WAITOK); b64 = (vm_offset_t)malloc_domainset(64 + 63, M_TEMP, DOMAINSET_PREF(pc->pc_domain), M_WAITOK); pc->pc_mds_buf64 = (void *)roundup2(b64, 64); bzero(pc->pc_mds_buf64, 64); } } xcr0 = rxcr(0); if ((xcr0 & XFEATURE_ENABLED_ZMM_HI256) != 0 && (cpu_stdext_feature2 & CPUID_STDEXT_AVX512DQ) != 0) mds_handler = mds_handler_skl_avx512; else if ((xcr0 & XFEATURE_ENABLED_AVX) != 0 && (cpu_feature2 & CPUID2_AVX) != 0) mds_handler = mds_handler_skl_avx; else mds_handler = mds_handler_skl_sse; } else if (CPUID_TO_FAMILY(cpu_id) == 0x6 && ((CPUID_TO_MODEL(cpu_id) == 0x37 || CPUID_TO_MODEL(cpu_id) == 0x4a || CPUID_TO_MODEL(cpu_id) == 0x4c || CPUID_TO_MODEL(cpu_id) == 0x4d || CPUID_TO_MODEL(cpu_id) == 0x5a || CPUID_TO_MODEL(cpu_id) == 0x5d || CPUID_TO_MODEL(cpu_id) == 0x6e || CPUID_TO_MODEL(cpu_id) == 0x65 || CPUID_TO_MODEL(cpu_id) == 0x75 || CPUID_TO_MODEL(cpu_id) == 0x1c || CPUID_TO_MODEL(cpu_id) == 0x26 || CPUID_TO_MODEL(cpu_id) == 0x27 || CPUID_TO_MODEL(cpu_id) == 0x35 || CPUID_TO_MODEL(cpu_id) == 0x36 || CPUID_TO_MODEL(cpu_id) == 0x7a))) { /* Silvermont, Airmont */ CPU_FOREACH(i) { pc = pcpu_find(i); if (pc->pc_mds_buf == NULL) pc->pc_mds_buf = malloc(256, M_TEMP, M_WAITOK); } mds_handler = mds_handler_silvermont; } else { hw_mds_disable = 0; mds_handler = mds_handler_void; } } static void hw_mds_recalculate_boot(void *arg __unused) { hw_mds_recalculate(); } SYSINIT(mds_recalc, SI_SUB_SMP, SI_ORDER_ANY, hw_mds_recalculate_boot, NULL); static int sysctl_mds_disable_handler(SYSCTL_HANDLER_ARGS) { int error, val; val = hw_mds_disable; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (val < 0 || val > 3) return (EINVAL); hw_mds_disable = val; hw_mds_recalculate(); return (0); } SYSCTL_PROC(_hw, OID_AUTO, mds_disable, CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, NULL, 0, sysctl_mds_disable_handler, "I", "Microarchitectural Data Sampling Mitigation " "(0 - off, 1 - on VERW, 2 - on SW, 3 - on AUTO"); /* * Intel Transactional Memory Asynchronous Abort Mitigation * CVE-2019-11135 */ int x86_taa_enable; int x86_taa_state; enum { TAA_NONE = 0, /* No mitigation enabled */ TAA_TSX_DISABLE = 1, /* Disable TSX via MSR */ TAA_VERW = 2, /* Use VERW mitigation */ TAA_AUTO = 3, /* Automatically select the mitigation */ /* The states below are not selectable by the operator */ TAA_TAA_UC = 4, /* Mitigation present in microcode */ TAA_NOT_PRESENT = 5 /* TSX is not present */ }; static void taa_set(bool enable, bool all) { x86_msr_op(MSR_IA32_TSX_CTRL, (enable ? MSR_OP_OR : MSR_OP_ANDNOT) | (all ? MSR_OP_RENDEZVOUS : MSR_OP_LOCAL), IA32_TSX_CTRL_RTM_DISABLE | IA32_TSX_CTRL_TSX_CPUID_CLEAR); } void x86_taa_recalculate(void) { static int taa_saved_mds_disable = 0; int taa_need = 0, taa_state = 0; int mds_disable = 0, need_mds_recalc = 0; /* Check CPUID.07h.EBX.HLE and RTM for the presence of TSX */ if ((cpu_stdext_feature & CPUID_STDEXT_HLE) == 0 || (cpu_stdext_feature & CPUID_STDEXT_RTM) == 0) { /* TSX is not present */ x86_taa_state = TAA_NOT_PRESENT; return; } /* Check to see what mitigation options the CPU gives us */ if (cpu_ia32_arch_caps & IA32_ARCH_CAP_TAA_NO) { /* CPU is not suseptible to TAA */ taa_need = TAA_TAA_UC; } else if (cpu_ia32_arch_caps & IA32_ARCH_CAP_TSX_CTRL) { /* * CPU can turn off TSX. This is the next best option * if TAA_NO hardware mitigation isn't present */ taa_need = TAA_TSX_DISABLE; } else { /* No TSX/TAA specific remedies are available. */ if (x86_taa_enable == TAA_TSX_DISABLE) { if (bootverbose) printf("TSX control not available\n"); return; } else taa_need = TAA_VERW; } /* Can we automatically take action, or are we being forced? */ if (x86_taa_enable == TAA_AUTO) taa_state = taa_need; else taa_state = x86_taa_enable; /* No state change, nothing to do */ if (taa_state == x86_taa_state) { if (bootverbose) printf("No TSX change made\n"); return; } /* Does the MSR need to be turned on or off? */ if (taa_state == TAA_TSX_DISABLE) taa_set(true, true); else if (x86_taa_state == TAA_TSX_DISABLE) taa_set(false, true); /* Does MDS need to be set to turn on VERW? */ if (taa_state == TAA_VERW) { taa_saved_mds_disable = hw_mds_disable; mds_disable = hw_mds_disable = 1; need_mds_recalc = 1; } else if (x86_taa_state == TAA_VERW) { mds_disable = hw_mds_disable = taa_saved_mds_disable; need_mds_recalc = 1; } if (need_mds_recalc) { hw_mds_recalculate(); if (mds_disable != hw_mds_disable) { if (bootverbose) printf("Cannot change MDS state for TAA\n"); /* Don't update our state */ return; } } x86_taa_state = taa_state; return; } static void taa_recalculate_boot(void * arg __unused) { x86_taa_recalculate(); } SYSINIT(taa_recalc, SI_SUB_SMP, SI_ORDER_ANY, taa_recalculate_boot, NULL); SYSCTL_NODE(_machdep_mitigations, OID_AUTO, taa, CTLFLAG_RW, 0, "TSX Asynchronous Abort Mitigation"); static int sysctl_taa_handler(SYSCTL_HANDLER_ARGS) { int error, val; val = x86_taa_enable; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); if (val < TAA_NONE || val > TAA_AUTO) return (EINVAL); x86_taa_enable = val; x86_taa_recalculate(); return (0); } SYSCTL_PROC(_machdep_mitigations_taa, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, NULL, 0, sysctl_taa_handler, "I", "TAA Mitigation enablement control " "(0 - off, 1 - disable TSX, 2 - VERW, 3 - on AUTO"); static int sysctl_taa_state_handler(SYSCTL_HANDLER_ARGS) { const char *state; switch (x86_taa_state) { case TAA_NONE: state = "inactive"; break; case TAA_TSX_DISABLE: state = "TSX disabled"; break; case TAA_VERW: state = "VERW"; break; case TAA_TAA_UC: state = "Mitigated in microcode"; break; case TAA_NOT_PRESENT: state = "TSX not present"; break; default: state = "unknown"; } return (SYSCTL_OUT(req, state, strlen(state))); } SYSCTL_PROC(_machdep_mitigations_taa, OID_AUTO, state, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_taa_state_handler, "A", "TAA Mitigation state"); int __read_frequently cpu_flush_rsb_ctxsw; SYSCTL_INT(_machdep_mitigations, OID_AUTO, flush_rsb_ctxsw, CTLFLAG_RW | CTLFLAG_NOFETCH, &cpu_flush_rsb_ctxsw, 0, "Flush Return Stack Buffer on context switch"); SYSCTL_NODE(_machdep_mitigations, OID_AUTO, rngds, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, "MCU Optimization, disable RDSEED mitigation"); int x86_rngds_mitg_enable = 1; void x86_rngds_mitg_recalculate(bool all_cpus) { if ((cpu_stdext_feature3 & CPUID_STDEXT3_MCUOPT) == 0) return; x86_msr_op(MSR_IA32_MCU_OPT_CTRL, (x86_rngds_mitg_enable ? MSR_OP_OR : MSR_OP_ANDNOT) | (all_cpus ? MSR_OP_RENDEZVOUS : MSR_OP_LOCAL), IA32_RNGDS_MITG_DIS); } static int sysctl_rngds_mitg_enable_handler(SYSCTL_HANDLER_ARGS) { int error, val; val = x86_rngds_mitg_enable; error = sysctl_handle_int(oidp, &val, 0, req); if (error != 0 || req->newptr == NULL) return (error); x86_rngds_mitg_enable = val; x86_rngds_mitg_recalculate(true); return (0); } SYSCTL_PROC(_machdep_mitigations_rngds, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, NULL, 0, sysctl_rngds_mitg_enable_handler, "I", "MCU Optimization, disabling RDSEED mitigation control " "(0 - mitigation disabled (RDSEED optimized), 1 - mitigation enabled"); static int sysctl_rngds_state_handler(SYSCTL_HANDLER_ARGS) { const char *state; if ((cpu_stdext_feature3 & CPUID_STDEXT3_MCUOPT) == 0) { state = "Not applicable"; } else if (x86_rngds_mitg_enable == 0) { state = "RDSEED not serialized"; } else { state = "Mitigated"; } return (SYSCTL_OUT(req, state, strlen(state))); } SYSCTL_PROC(_machdep_mitigations_rngds, OID_AUTO, state, CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_rngds_state_handler, "A", "MCU Optimization state"); /* * Enable and restore kernel text write permissions. * Callers must ensure that disable_wp()/restore_wp() are executed * without rescheduling on the same core. */ bool disable_wp(void) { u_int cr0; cr0 = rcr0(); if ((cr0 & CR0_WP) == 0) return (false); load_cr0(cr0 & ~CR0_WP); return (true); } void restore_wp(bool old_wp) { if (old_wp) load_cr0(rcr0() | CR0_WP); } bool acpi_get_fadt_bootflags(uint16_t *flagsp) { #ifdef DEV_ACPI ACPI_TABLE_FADT *fadt; vm_paddr_t physaddr; physaddr = acpi_find_table(ACPI_SIG_FADT); if (physaddr == 0) return (false); fadt = acpi_map_table(physaddr, ACPI_SIG_FADT); if (fadt == NULL) return (false); *flagsp = fadt->BootFlags; acpi_unmap_table(fadt); return (true); #else return (false); #endif } + +DEFINE_IFUNC(, uint64_t, rdtsc_ordered, (void), static) +{ + bool cpu_is_amd = cpu_vendor_id == CPU_VENDOR_AMD || + cpu_vendor_id == CPU_VENDOR_HYGON; + + if ((amd_feature & AMDID_RDTSCP) != 0) + return (rdtscp); + else if ((cpu_feature & CPUID_SSE2) != 0) + return (cpu_is_amd ? rdtsc_ordered_mfence : + rdtsc_ordered_lfence); + else + return (rdtsc); +}