Index: head/emulators/xen-kernel/Makefile =================================================================== --- head/emulators/xen-kernel/Makefile +++ head/emulators/xen-kernel/Makefile @@ -2,7 +2,7 @@ PORTNAME= xen PORTVERSION= 4.12.0 -PORTREVISION= 0 +PORTREVISION= 1 CATEGORIES= emulators MASTER_SITES= http://downloads.xenproject.org/release/xen/${PORTVERSION}/ PKGNAMESUFFIX= -kernel @@ -23,6 +23,20 @@ STRIP= # PLIST_FILES= /boot/xen \ lib/debug/boot/xen.debug + +# Pre-patches for XSA-297 to apply cleanly +EXTRA_PATCHES+= ${PATCHDIR}/0001-x86-tsx-Implement-controls-for-RTM-force-abort-mode.patch:-p1 \ + ${PATCHDIR}/0001-x86-msr-Shorten-ARCH_CAPABILITIES_-constants.patch:-p1 \ + ${PATCHDIR}/0002-x86-spec-ctrl-Extend-repoline-safey-calcuations-for-.patch:-p1 + +# Fixes for XSA-297 (MDS) +EXTRA_PATCHES+= ${PATCHDIR}/xsa297-4.12-1.patch:-p1 \ + ${PATCHDIR}/xsa297-4.12-2.patch:-p1 \ + ${PATCHDIR}/xsa297-4.12-3.patch:-p1 \ + ${PATCHDIR}/xsa297-4.12-4.patch:-p1 \ + ${PATCHDIR}/xsa297-4.12-5.patch:-p1 \ + ${PATCHDIR}/xsa297-4.12-6.patch:-p1 \ + ${PATCHDIR}/xsa297-4.12-7.patch:-p1 .include Index: head/emulators/xen-kernel/files/0001-x86-msr-Shorten-ARCH_CAPABILITIES_-constants.patch =================================================================== --- head/emulators/xen-kernel/files/0001-x86-msr-Shorten-ARCH_CAPABILITIES_-constants.patch +++ head/emulators/xen-kernel/files/0001-x86-msr-Shorten-ARCH_CAPABILITIES_-constants.patch @@ -0,0 +1,71 @@ +From e25d1338e1d8a32e46c808321323c4ad8fc5ba01 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 3 May 2019 10:44:58 +0200 +Subject: [PATCH 1/2] x86/msr: Shorten ARCH_CAPABILITIES_* constants + +They are unnecesserily verbose, and ARCH_CAPS_* is already the more common +version. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +master commit: ba27aaa88548c824a47dcf5609288ee1c05d2946 +master date: 2019-03-18 16:26:40 +0000 +--- + xen/arch/x86/spec_ctrl.c | 10 +++++----- + xen/include/asm-x86/msr-index.h | 4 ++-- + 2 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index ad72ecd3a5..22bfc5a5e8 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -230,8 +230,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) + (_7d0 & cpufeat_mask(X86_FEATURE_L1D_FLUSH)) ? " L1D_FLUSH" : "", + (_7d0 & cpufeat_mask(X86_FEATURE_SSBD)) ? " SSBD" : "", + (e8b & cpufeat_mask(X86_FEATURE_IBPB)) ? " IBPB" : "", +- (caps & ARCH_CAPABILITIES_IBRS_ALL) ? " IBRS_ALL" : "", +- (caps & ARCH_CAPABILITIES_RDCL_NO) ? " RDCL_NO" : "", ++ (caps & ARCH_CAPS_IBRS_ALL) ? " IBRS_ALL" : "", ++ (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "", + (caps & ARCH_CAPS_RSBA) ? " RSBA" : "", + (caps & ARCH_CAPS_SKIP_L1DFL) ? " SKIP_L1DFL": "", + (caps & ARCH_CAPS_SSB_NO) ? " SSB_NO" : ""); +@@ -549,7 +549,7 @@ static __init void l1tf_calculations(uint64_t caps) + } + + /* Any processor advertising RDCL_NO should be not vulnerable to L1TF. */ +- if ( caps & ARCH_CAPABILITIES_RDCL_NO ) ++ if ( caps & ARCH_CAPS_RDCL_NO ) + cpu_has_bug_l1tf = false; + + if ( cpu_has_bug_l1tf && hit_default ) +@@ -613,9 +613,9 @@ int8_t __read_mostly opt_xpti_domu = -1; + static __init void xpti_init_default(uint64_t caps) + { + if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) +- caps = ARCH_CAPABILITIES_RDCL_NO; ++ caps = ARCH_CAPS_RDCL_NO; + +- if ( caps & ARCH_CAPABILITIES_RDCL_NO ) ++ if ( caps & ARCH_CAPS_RDCL_NO ) + { + if ( opt_xpti_hwdom < 0 ) + opt_xpti_hwdom = 0; +diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h +index c6e1d8768f..11512d4250 100644 +--- a/xen/include/asm-x86/msr-index.h ++++ b/xen/include/asm-x86/msr-index.h +@@ -42,8 +42,8 @@ + #define PRED_CMD_IBPB (_AC(1, ULL) << 0) + + #define MSR_ARCH_CAPABILITIES 0x0000010a +-#define ARCH_CAPABILITIES_RDCL_NO (_AC(1, ULL) << 0) +-#define ARCH_CAPABILITIES_IBRS_ALL (_AC(1, ULL) << 1) ++#define ARCH_CAPS_RDCL_NO (_AC(1, ULL) << 0) ++#define ARCH_CAPS_IBRS_ALL (_AC(1, ULL) << 1) + #define ARCH_CAPS_RSBA (_AC(1, ULL) << 2) + #define ARCH_CAPS_SKIP_L1DFL (_AC(1, ULL) << 3) + #define ARCH_CAPS_SSB_NO (_AC(1, ULL) << 4) +-- +2.17.2 (Apple Git-113) + Index: head/emulators/xen-kernel/files/0001-x86-tsx-Implement-controls-for-RTM-force-abort-mode.patch =================================================================== --- head/emulators/xen-kernel/files/0001-x86-tsx-Implement-controls-for-RTM-force-abort-mode.patch +++ head/emulators/xen-kernel/files/0001-x86-tsx-Implement-controls-for-RTM-force-abort-mode.patch @@ -0,0 +1,185 @@ +From e3a1ebee98e61dfe3b8b1eb71043ad5220b3cfd3 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 3 May 2019 10:39:29 +0200 +Subject: [PATCH] x86/tsx: Implement controls for RTM force-abort mode + +The CPUID bit and MSR are deliberately not exposed to guests, because they +won't exist on newer processors. As vPMU isn't security supported, the +misbehaviour of PCR3 isn't expected to impact production deployments. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +master commit: 6be613f29b4205349275d24367bd4c82fb2960dd +master date: 2019-03-12 17:05:21 +0000 +--- + docs/misc/xen-command-line.pandoc | 17 ++++++++++++++++- + tools/misc/xen-cpuid.c | 2 ++ + xen/arch/x86/cpu/intel.c | 3 +++ + xen/arch/x86/cpu/vpmu.c | 3 +++ + xen/arch/x86/msr.c | 4 ++++ + xen/include/asm-x86/cpufeature.h | 3 +++ + xen/include/asm-x86/msr-index.h | 3 +++ + xen/include/asm-x86/vpmu.h | 1 + + xen/include/public/arch-x86/cpufeatureset.h | 1 + + 9 files changed, 36 insertions(+), 1 deletion(-) + +diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc +index 742555616d..6db82f302e 100644 +--- a/docs/misc/xen-command-line.pandoc ++++ b/docs/misc/xen-command-line.pandoc +@@ -2109,7 +2109,7 @@ Use Virtual Processor ID support if available. This prevents the need for TLB + flushes on VM entry and exit, increasing performance. + + ### vpmu (x86) +- = List of [ , bts, ipc, arch ] ++ = List of [ , bts, ipc, arch, rtm-abort= ] + + Applicability: x86. Default: false + +@@ -2142,6 +2142,21 @@ provide access to a wealth of low level processor information. + + * The `arch` option allows access to the pre-defined architectural events. + ++* The `rtm-abort` boolean controls a trade-off between working Restricted ++ Transactional Memory, and working performance counters. ++ ++ All processors released to date (Q1 2019) supporting Transactional Memory ++ Extensions suffer an erratum which has been addressed in microcode. ++ ++ Processors based on the Skylake microarchitecture with up-to-date ++ microcode internally use performance counter 3 to work around the erratum. ++ A consequence is that the counter gets reprogrammed whenever an `XBEGIN` ++ instruction is executed. ++ ++ An alternative mode exists where PCR3 behaves as before, at the cost of ++ `XBEGIN` unconditionally aborting. Enabling `rtm-abort` mode will ++ activate this alternative mode. ++ + *Warning:* + As the virtualisation is not 100% safe, don't use the vpmu flag on + production systems (see http://xenbits.xen.org/xsa/advisory-163.html)! +diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c +index 6e7ca8b9a4..d87a72e3e6 100644 +--- a/tools/misc/xen-cpuid.c ++++ b/tools/misc/xen-cpuid.c +@@ -146,6 +146,8 @@ static const char *str_7d0[32] = + { + [ 2] = "avx512_4vnniw", [ 3] = "avx512_4fmaps", + ++ /* 12 */ [13] = "tsx-force-abort", ++ + [26] = "ibrsb", [27] = "stibp", + [28] = "l1d_flush", [29] = "arch_caps", + /* 30 */ [31] = "ssbd", +diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c +index 65fa3d611f..29c6b87512 100644 +--- a/xen/arch/x86/cpu/intel.c ++++ b/xen/arch/x86/cpu/intel.c +@@ -286,6 +286,9 @@ static void Intel_errata_workarounds(struct cpuinfo_x86 *c) + if (c->x86 == 6 && cpu_has_clflush && + (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47)) + __set_bit(X86_FEATURE_CLFLUSH_MONITOR, c->x86_capability); ++ ++ if (cpu_has_tsx_force_abort && opt_rtm_abort) ++ wrmsrl(MSR_TSX_FORCE_ABORT, TSX_FORCE_ABORT_RTM); + } + + +diff --git a/xen/arch/x86/cpu/vpmu.c b/xen/arch/x86/cpu/vpmu.c +index 8324d62f11..8f6daf13fd 100644 +--- a/xen/arch/x86/cpu/vpmu.c ++++ b/xen/arch/x86/cpu/vpmu.c +@@ -45,6 +45,7 @@ CHECK_pmu_params; + static unsigned int __read_mostly opt_vpmu_enabled; + unsigned int __read_mostly vpmu_mode = XENPMU_MODE_OFF; + unsigned int __read_mostly vpmu_features = 0; ++bool __read_mostly opt_rtm_abort; + + static DEFINE_SPINLOCK(vpmu_lock); + static unsigned vpmu_count; +@@ -73,6 +74,8 @@ static int __init parse_vpmu_params(const char *s) + vpmu_features |= XENPMU_FEATURE_IPC_ONLY; + else if ( !cmdline_strcmp(s, "arch") ) + vpmu_features |= XENPMU_FEATURE_ARCH_ONLY; ++ else if ( (val = parse_boolean("rtm-abort", s, ss)) >= 0 ) ++ opt_rtm_abort = val; + else + rc = -EINVAL; + +diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c +index 9bb38b6d66..4df4a59f4d 100644 +--- a/xen/arch/x86/msr.c ++++ b/xen/arch/x86/msr.c +@@ -131,6 +131,8 @@ int guest_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) + case MSR_PRED_CMD: + case MSR_FLUSH_CMD: + /* Write-only */ ++ case MSR_TSX_FORCE_ABORT: ++ /* Not offered to guests. */ + goto gp_fault; + + case MSR_SPEC_CTRL: +@@ -230,6 +232,8 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) + case MSR_INTEL_PLATFORM_INFO: + case MSR_ARCH_CAPABILITIES: + /* Read-only */ ++ case MSR_TSX_FORCE_ABORT: ++ /* Not offered to guests. */ + goto gp_fault; + + case MSR_AMD_PATCHLOADER: +diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h +index 1fb9af4b19..745801f3c0 100644 +--- a/xen/include/asm-x86/cpufeature.h ++++ b/xen/include/asm-x86/cpufeature.h +@@ -112,6 +112,9 @@ + /* CPUID level 0x80000007.edx */ + #define cpu_has_itsc boot_cpu_has(X86_FEATURE_ITSC) + ++/* CPUID level 0x00000007:0.edx */ ++#define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) ++ + /* Synthesized. */ + #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) + #define cpu_has_cpuid_faulting boot_cpu_has(X86_FEATURE_CPUID_FAULTING) +diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h +index 24d783a72d..c6e1d8768f 100644 +--- a/xen/include/asm-x86/msr-index.h ++++ b/xen/include/asm-x86/msr-index.h +@@ -51,6 +51,9 @@ + #define MSR_FLUSH_CMD 0x0000010b + #define FLUSH_CMD_L1D (_AC(1, ULL) << 0) + ++#define MSR_TSX_FORCE_ABORT 0x0000010f ++#define TSX_FORCE_ABORT_RTM (_AC(1, ULL) << 0) ++ + /* Intel MSRs. Some also available on other CPUs */ + #define MSR_IA32_PERFCTR0 0x000000c1 + #define MSR_IA32_A_PERFCTR0 0x000004c1 +diff --git a/xen/include/asm-x86/vpmu.h b/xen/include/asm-x86/vpmu.h +index 5e778ab7ba..1287b9fb6e 100644 +--- a/xen/include/asm-x86/vpmu.h ++++ b/xen/include/asm-x86/vpmu.h +@@ -125,6 +125,7 @@ static inline int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) + + extern unsigned int vpmu_mode; + extern unsigned int vpmu_features; ++extern bool opt_rtm_abort; + + /* Context switch */ + static inline void vpmu_switch_from(struct vcpu *prev) +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index fbc68fa29f..2bcc5487ac 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -242,6 +242,7 @@ XEN_CPUFEATURE(IBPB, 8*32+12) /*A IBPB support only (no IBRS, used by + /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */ + XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */ + XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single Precision */ ++XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */ + XEN_CPUFEATURE(IBRSB, 9*32+26) /*A IBRS and IBPB support (used by Intel) */ + XEN_CPUFEATURE(STIBP, 9*32+27) /*A STIBP */ + XEN_CPUFEATURE(L1D_FLUSH, 9*32+28) /*S MSR_FLUSH_CMD and L1D flush. */ +-- +2.17.2 (Apple Git-113) + Index: head/emulators/xen-kernel/files/0002-x86-spec-ctrl-Extend-repoline-safey-calcuations-for-.patch =================================================================== --- head/emulators/xen-kernel/files/0002-x86-spec-ctrl-Extend-repoline-safey-calcuations-for-.patch +++ head/emulators/xen-kernel/files/0002-x86-spec-ctrl-Extend-repoline-safey-calcuations-for-.patch @@ -0,0 +1,68 @@ +From fd2a34c9655acecaaa1541dd84fc670936303175 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 3 May 2019 10:45:45 +0200 +Subject: [PATCH 2/2] x86/spec-ctrl: Extend repoline safey calcuations for + eIBRS and Atom parts + +All currently-released Atom processors are in practice retpoline-safe, because +they don't fall back to a BTB prediction on RSB underflow. + +However, an additional meaning of Enhanced IRBS is that the processor may not +be retpoline-safe. The Gemini Lake platform, based on the Goldmont Plus +microarchitecture is the first Atom processor to support eIBRS. + +Until Xen gets full eIBRS support, Gemini Lake will still be safe using +regular IBRS. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +master commit: 17f74242ccf0ce6e51c03a5860947865c0ef0dc2 +master date: 2019-03-18 16:26:40 +0000 +--- + xen/arch/x86/spec_ctrl.c | 22 +++++++++++++++++++++- + 1 file changed, 21 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 22bfc5a5e8..1171c02ab1 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -316,8 +316,11 @@ static bool __init retpoline_safe(uint64_t caps) + /* + * RSBA may be set by a hypervisor to indicate that we may move to a + * processor which isn't retpoline-safe. ++ * ++ * Processors offering Enhanced IBRS are not guarenteed to be ++ * repoline-safe. + */ +- if ( caps & ARCH_CAPS_RSBA ) ++ if ( caps & (ARCH_CAPS_RSBA | ARCH_CAPS_IBRS_ALL) ) + return false; + + switch ( boot_cpu_data.x86_model ) +@@ -377,6 +380,23 @@ static bool __init retpoline_safe(uint64_t caps) + case 0x9e: + return false; + ++ /* ++ * Atom processors before Goldmont Plus/Gemini Lake are retpoline-safe. ++ */ ++ case 0x1c: /* Pineview */ ++ case 0x26: /* Lincroft */ ++ case 0x27: /* Penwell */ ++ case 0x35: /* Cloverview */ ++ case 0x36: /* Cedarview */ ++ case 0x37: /* Baytrail / Valleyview (Silvermont) */ ++ case 0x4d: /* Avaton / Rangely (Silvermont) */ ++ case 0x4c: /* Cherrytrail / Brasswell */ ++ case 0x4a: /* Merrifield */ ++ case 0x5a: /* Moorefield */ ++ case 0x5c: /* Goldmont */ ++ case 0x5f: /* Denverton */ ++ return true; ++ + default: + printk("Unrecognised CPU model %#x - assuming not reptpoline safe\n", + boot_cpu_data.x86_model); +-- +2.17.2 (Apple Git-113) + Index: head/emulators/xen-kernel/files/xsa297-4.12-1.patch =================================================================== --- head/emulators/xen-kernel/files/xsa297-4.12-1.patch +++ head/emulators/xen-kernel/files/xsa297-4.12-1.patch @@ -0,0 +1,163 @@ +From: Andrew Cooper +Subject: x86/spec-ctrl: Reposition the XPTI command line parsing logic + +It has ended up in the middle of the mitigation calculation logic. Move it to +be beside the other command line parsing. + +No functional change. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 1171c02..99310c8 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -167,6 +167,73 @@ static int __init parse_spec_ctrl(const char *s) + } + custom_param("spec-ctrl", parse_spec_ctrl); + ++int8_t __read_mostly opt_xpti_hwdom = -1; ++int8_t __read_mostly opt_xpti_domu = -1; ++ ++static __init void xpti_init_default(uint64_t caps) ++{ ++ if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) ++ caps = ARCH_CAPS_RDCL_NO; ++ ++ if ( caps & ARCH_CAPS_RDCL_NO ) ++ { ++ if ( opt_xpti_hwdom < 0 ) ++ opt_xpti_hwdom = 0; ++ if ( opt_xpti_domu < 0 ) ++ opt_xpti_domu = 0; ++ } ++ else ++ { ++ if ( opt_xpti_hwdom < 0 ) ++ opt_xpti_hwdom = 1; ++ if ( opt_xpti_domu < 0 ) ++ opt_xpti_domu = 1; ++ } ++} ++ ++static __init int parse_xpti(const char *s) ++{ ++ const char *ss; ++ int val, rc = 0; ++ ++ /* Interpret 'xpti' alone in its positive boolean form. */ ++ if ( *s == '\0' ) ++ opt_xpti_hwdom = opt_xpti_domu = 1; ++ ++ do { ++ ss = strchr(s, ','); ++ if ( !ss ) ++ ss = strchr(s, '\0'); ++ ++ switch ( parse_bool(s, ss) ) ++ { ++ case 0: ++ opt_xpti_hwdom = opt_xpti_domu = 0; ++ break; ++ ++ case 1: ++ opt_xpti_hwdom = opt_xpti_domu = 1; ++ break; ++ ++ default: ++ if ( !strcmp(s, "default") ) ++ opt_xpti_hwdom = opt_xpti_domu = -1; ++ else if ( (val = parse_boolean("dom0", s, ss)) >= 0 ) ++ opt_xpti_hwdom = val; ++ else if ( (val = parse_boolean("domu", s, ss)) >= 0 ) ++ opt_xpti_domu = val; ++ else if ( *s ) ++ rc = -EINVAL; ++ break; ++ } ++ ++ s = ss + 1; ++ } while ( *ss ); ++ ++ return rc; ++} ++custom_param("xpti", parse_xpti); ++ + int8_t __read_mostly opt_pv_l1tf_hwdom = -1; + int8_t __read_mostly opt_pv_l1tf_domu = -1; + +@@ -627,73 +694,6 @@ static __init void l1tf_calculations(uint64_t caps) + : (3ul << (paddr_bits - 2)))); + } + +-int8_t __read_mostly opt_xpti_hwdom = -1; +-int8_t __read_mostly opt_xpti_domu = -1; +- +-static __init void xpti_init_default(uint64_t caps) +-{ +- if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) +- caps = ARCH_CAPS_RDCL_NO; +- +- if ( caps & ARCH_CAPS_RDCL_NO ) +- { +- if ( opt_xpti_hwdom < 0 ) +- opt_xpti_hwdom = 0; +- if ( opt_xpti_domu < 0 ) +- opt_xpti_domu = 0; +- } +- else +- { +- if ( opt_xpti_hwdom < 0 ) +- opt_xpti_hwdom = 1; +- if ( opt_xpti_domu < 0 ) +- opt_xpti_domu = 1; +- } +-} +- +-static __init int parse_xpti(const char *s) +-{ +- const char *ss; +- int val, rc = 0; +- +- /* Interpret 'xpti' alone in its positive boolean form. */ +- if ( *s == '\0' ) +- opt_xpti_hwdom = opt_xpti_domu = 1; +- +- do { +- ss = strchr(s, ','); +- if ( !ss ) +- ss = strchr(s, '\0'); +- +- switch ( parse_bool(s, ss) ) +- { +- case 0: +- opt_xpti_hwdom = opt_xpti_domu = 0; +- break; +- +- case 1: +- opt_xpti_hwdom = opt_xpti_domu = 1; +- break; +- +- default: +- if ( !strcmp(s, "default") ) +- opt_xpti_hwdom = opt_xpti_domu = -1; +- else if ( (val = parse_boolean("dom0", s, ss)) >= 0 ) +- opt_xpti_hwdom = val; +- else if ( (val = parse_boolean("domu", s, ss)) >= 0 ) +- opt_xpti_domu = val; +- else if ( *s ) +- rc = -EINVAL; +- break; +- } +- +- s = ss + 1; +- } while ( *ss ); +- +- return rc; +-} +-custom_param("xpti", parse_xpti); +- + void __init init_speculation_mitigations(void) + { + enum ind_thunk thunk = THUNK_DEFAULT; Index: head/emulators/xen-kernel/files/xsa297-4.12-2.patch =================================================================== --- head/emulators/xen-kernel/files/xsa297-4.12-2.patch +++ head/emulators/xen-kernel/files/xsa297-4.12-2.patch @@ -0,0 +1,54 @@ +From: Andrew Cooper +Subject: x86/msr: Definitions for MSR_INTEL_CORE_THREAD_COUNT + +This is a model specific register which details the current configuration +cores and threads in the package. Because of how Hyperthread and Core +configuration works works in firmware, the MSR it is de-facto constant and +will remain unchanged until the next system reset. + +It is a read only MSR (so unilaterally reject writes), but for now retain its +leaky-on-read properties. Further CPUID/MSR work is required before we can +start virtualising a consistent topology to the guest, and retaining the old +behaviour is the safest course of action. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich + +diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c +index 4df4a59..a7f67d9 100644 +--- a/xen/arch/x86/msr.c ++++ b/xen/arch/x86/msr.c +@@ -200,6 +200,10 @@ int guest_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) + ARRAY_SIZE(msrs->dr_mask))]; + break; + ++ /* ++ * TODO: Implement when we have better topology representation. ++ case MSR_INTEL_CORE_THREAD_COUNT: ++ */ + default: + return X86EMUL_UNHANDLEABLE; + } +@@ -229,6 +233,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) + { + uint64_t rsvd; + ++ case MSR_INTEL_CORE_THREAD_COUNT: + case MSR_INTEL_PLATFORM_INFO: + case MSR_ARCH_CAPABILITIES: + /* Read-only */ +diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h +index 11512d4..389f95f 100644 +--- a/xen/include/asm-x86/msr-index.h ++++ b/xen/include/asm-x86/msr-index.h +@@ -32,6 +32,10 @@ + #define EFER_KNOWN_MASK (EFER_SCE | EFER_LME | EFER_LMA | EFER_NX | \ + EFER_SVME | EFER_FFXSE) + ++#define MSR_INTEL_CORE_THREAD_COUNT 0x00000035 ++#define MSR_CTC_THREAD_MASK 0x0000ffff ++#define MSR_CTC_CORE_MASK 0xffff0000 ++ + /* Speculation Controls. */ + #define MSR_SPEC_CTRL 0x00000048 + #define SPEC_CTRL_IBRS (_AC(1, ULL) << 0) Index: head/emulators/xen-kernel/files/xsa297-4.12-3.patch =================================================================== --- head/emulators/xen-kernel/files/xsa297-4.12-3.patch +++ head/emulators/xen-kernel/files/xsa297-4.12-3.patch @@ -0,0 +1,109 @@ +From: Andrew Cooper +Subject: x86/boot: Detect the firmware SMT setting correctly on Intel hardware + +While boot_cpu_data.x86_num_siblings is an accurate value to use on AMD +hardware, it isn't on Intel when the user has disabled Hyperthreading in the +firmware. As a result, a user which has chosen to disable HT still gets +nagged on L1TF-vulnerable hardware when they haven't chosen an explicit +smt= setting. + +Make use of the largely-undocumented MSR_INTEL_CORE_THREAD_COUNT which in +practice exists since Nehalem, when booting on real hardware. Fall back to +using the ACPI table APIC IDs. + +While adjusting this logic, fix a latent bug in amd_get_topology(). The +thread count field in CPUID.0x8000001e.ebx is documented as 8 bits wide, +rather than 2 bits wide. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich + +diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c +index c790416..b1debac 100644 +--- a/xen/arch/x86/cpu/amd.c ++++ b/xen/arch/x86/cpu/amd.c +@@ -507,7 +507,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c) + u32 eax, ebx, ecx, edx; + + cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); +- c->x86_num_siblings = ((ebx >> 8) & 0x3) + 1; ++ c->x86_num_siblings = ((ebx >> 8) & 0xff) + 1; + + if (c->x86 < 0x17) + c->compute_unit_id = ebx & 0xFF; +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 99310c8..e49ab3f 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -368,6 +368,45 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) + #endif + } + ++static bool __init check_smt_enabled(void) ++{ ++ uint64_t val; ++ unsigned int cpu; ++ ++ /* ++ * x86_num_siblings defaults to 1 in the absence of other information, and ++ * is adjusted based on other topology information found in CPUID leaves. ++ * ++ * On AMD hardware, it will be the current SMT configuration. On Intel ++ * hardware, it will represent the maximum capability, rather than the ++ * current configuration. ++ */ ++ if ( boot_cpu_data.x86_num_siblings < 2 ) ++ return false; ++ ++ /* ++ * Intel Nehalem and later hardware does have an MSR which reports the ++ * current count of cores/threads in the package. ++ * ++ * At the time of writing, it is almost completely undocumented, so isn't ++ * virtualised reliably. ++ */ ++ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && !cpu_has_hypervisor && ++ !rdmsr_safe(MSR_INTEL_CORE_THREAD_COUNT, val) ) ++ return (MASK_EXTR(val, MSR_CTC_CORE_MASK) != ++ MASK_EXTR(val, MSR_CTC_THREAD_MASK)); ++ ++ /* ++ * Search over the CPUs reported in the ACPI tables. Any whose APIC ID ++ * has a non-zero thread id component indicates that SMT is active. ++ */ ++ for_each_present_cpu ( cpu ) ++ if ( x86_cpu_to_apicid[cpu] & (boot_cpu_data.x86_num_siblings - 1) ) ++ return true; ++ ++ return false; ++} ++ + /* Calculate whether Retpoline is known-safe on this CPU. */ + static bool __init retpoline_safe(uint64_t caps) + { +@@ -697,12 +736,14 @@ static __init void l1tf_calculations(uint64_t caps) + void __init init_speculation_mitigations(void) + { + enum ind_thunk thunk = THUNK_DEFAULT; +- bool use_spec_ctrl = false, ibrs = false; ++ bool use_spec_ctrl = false, ibrs = false, hw_smt_enabled; + uint64_t caps = 0; + + if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) ) + rdmsrl(MSR_ARCH_CAPABILITIES, caps); + ++ hw_smt_enabled = check_smt_enabled(); ++ + /* + * Has the user specified any custom BTI mitigations? If so, follow their + * instructions exactly and disable all heuristics. +@@ -873,8 +914,7 @@ void __init init_speculation_mitigations(void) + * However, if we are on affected hardware, with HT enabled, and the user + * hasn't explicitly chosen whether to use HT or not, nag them to do so. + */ +- if ( opt_smt == -1 && cpu_has_bug_l1tf && !pv_shim && +- boot_cpu_data.x86_num_siblings > 1 ) ++ if ( opt_smt == -1 && cpu_has_bug_l1tf && !pv_shim && hw_smt_enabled ) + warning_add( + "Booted on L1TF-vulnerable hardware with SMT/Hyperthreading\n" + "enabled. Please assess your configuration and choose an\n" Index: head/emulators/xen-kernel/files/xsa297-4.12-4.patch =================================================================== --- head/emulators/xen-kernel/files/xsa297-4.12-4.patch +++ head/emulators/xen-kernel/files/xsa297-4.12-4.patch @@ -0,0 +1,55 @@ +From: Andrew Cooper +Subject: x86/spec-ctrl: Misc non-functional cleanup + + * Identify BTI in the spec_ctrl_{enter,exit}_idle() comments, as other + mitigations will shortly appear. + * Use alternative_input() and cover the lack of memory cobber with a further + barrier. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h +index 779da2b..20ee112 100644 +--- a/xen/include/asm-x86/spec_ctrl.h ++++ b/xen/include/asm-x86/spec_ctrl.h +@@ -68,6 +68,8 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info) + uint32_t val = 0; + + /* ++ * Branch Target Injection: ++ * + * Latch the new shadow value, then enable shadowing, then update the MSR. + * There are no SMP issues here; only local processor ordering concerns. + */ +@@ -75,8 +77,9 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info) + barrier(); + info->spec_ctrl_flags |= SCF_use_shadow; + barrier(); +- asm volatile ( ALTERNATIVE("", "wrmsr", X86_FEATURE_SC_MSR_IDLE) +- :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" ); ++ alternative_input("", "wrmsr", X86_FEATURE_SC_MSR_IDLE, ++ "a" (val), "c" (MSR_SPEC_CTRL), "d" (0)); ++ barrier(); + } + + /* WARNING! `ret`, `call *`, `jmp *` not safe before this call. */ +@@ -85,13 +88,16 @@ static always_inline void spec_ctrl_exit_idle(struct cpu_info *info) + uint32_t val = info->xen_spec_ctrl; + + /* ++ * Branch Target Injection: ++ * + * Disable shadowing before updating the MSR. There are no SMP issues + * here; only local processor ordering concerns. + */ + info->spec_ctrl_flags &= ~SCF_use_shadow; + barrier(); +- asm volatile ( ALTERNATIVE("", "wrmsr", X86_FEATURE_SC_MSR_IDLE) +- :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" ); ++ alternative_input("", "wrmsr", X86_FEATURE_SC_MSR_IDLE, ++ "a" (val), "c" (MSR_SPEC_CTRL), "d" (0)); ++ barrier(); + } + + #endif /* __ASSEMBLY__ */ Index: head/emulators/xen-kernel/files/xsa297-4.12-5.patch =================================================================== --- head/emulators/xen-kernel/files/xsa297-4.12-5.patch +++ head/emulators/xen-kernel/files/xsa297-4.12-5.patch @@ -0,0 +1,147 @@ +From: Andrew Cooper +Subject: x86/spec-ctrl: CPUID/MSR definitions for Microarchitectural Data + Sampling + +The MD_CLEAR feature can be automatically offered to guests. No +infrastructure is needed in Xen to support the guest making use of it. + +This is part of XSA-297, CVE-2018-12126, CVE-2018-12127, CVE-2018-12130, CVE-2019-11091. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc +index 6db82f3..f80d8d8 100644 +--- a/docs/misc/xen-command-line.pandoc ++++ b/docs/misc/xen-command-line.pandoc +@@ -483,7 +483,7 @@ accounting for hardware capabilities as enumerated via CPUID. + + Currently accepted: + +-The Speculation Control hardware features `ibrsb`, `stibp`, `ibpb`, ++The Speculation Control hardware features `md-clear`, `ibrsb`, `stibp`, `ibpb`, + `l1d-flush` and `ssbd` are used by default if available and applicable. They can + be ignored, e.g. `no-ibrsb`, at which point Xen won't use them itself, and + won't offer them to guests. +diff --git a/tools/libxl/libxl_cpuid.c b/tools/libxl/libxl_cpuid.c +index 52e16c2..5a1702d 100644 +--- a/tools/libxl/libxl_cpuid.c ++++ b/tools/libxl/libxl_cpuid.c +@@ -202,6 +202,7 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list *cpuid, const char* str) + + {"avx512-4vnniw",0x00000007, 0, CPUID_REG_EDX, 2, 1}, + {"avx512-4fmaps",0x00000007, 0, CPUID_REG_EDX, 3, 1}, ++ {"md-clear", 0x00000007, 0, CPUID_REG_EDX, 10, 1}, + {"ibrsb", 0x00000007, 0, CPUID_REG_EDX, 26, 1}, + {"stibp", 0x00000007, 0, CPUID_REG_EDX, 27, 1}, + {"l1d-flush", 0x00000007, 0, CPUID_REG_EDX, 28, 1}, +diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c +index d87a72e..f67ecd3 100644 +--- a/tools/misc/xen-cpuid.c ++++ b/tools/misc/xen-cpuid.c +@@ -146,6 +146,7 @@ static const char *str_7d0[32] = + { + [ 2] = "avx512_4vnniw", [ 3] = "avx512_4fmaps", + ++ [10] = "md-clear", + /* 12 */ [13] = "tsx-force-abort", + + [26] = "ibrsb", [27] = "stibp", +diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c +index ab0aab6..3efad9c 100644 +--- a/xen/arch/x86/cpuid.c ++++ b/xen/arch/x86/cpuid.c +@@ -29,7 +29,12 @@ static int __init parse_xen_cpuid(const char *s) + if ( !ss ) + ss = strchr(s, '\0'); + +- if ( (val = parse_boolean("ibpb", s, ss)) >= 0 ) ++ if ( (val = parse_boolean("md-clear", s, ss)) >= 0 ) ++ { ++ if ( !val ) ++ setup_clear_cpu_cap(X86_FEATURE_MD_CLEAR); ++ } ++ else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 ) + { + if ( !val ) + setup_clear_cpu_cap(X86_FEATURE_IBPB); +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index e49ab3f..a573b02 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -291,17 +291,19 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) + printk("Speculative mitigation facilities:\n"); + + /* Hardware features which pertain to speculative mitigations. */ +- printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s\n", ++ printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s\n", + (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBRS/IBPB" : "", + (_7d0 & cpufeat_mask(X86_FEATURE_STIBP)) ? " STIBP" : "", + (_7d0 & cpufeat_mask(X86_FEATURE_L1D_FLUSH)) ? " L1D_FLUSH" : "", + (_7d0 & cpufeat_mask(X86_FEATURE_SSBD)) ? " SSBD" : "", ++ (_7d0 & cpufeat_mask(X86_FEATURE_MD_CLEAR)) ? " MD_CLEAR" : "", + (e8b & cpufeat_mask(X86_FEATURE_IBPB)) ? " IBPB" : "", + (caps & ARCH_CAPS_IBRS_ALL) ? " IBRS_ALL" : "", + (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "", + (caps & ARCH_CAPS_RSBA) ? " RSBA" : "", + (caps & ARCH_CAPS_SKIP_L1DFL) ? " SKIP_L1DFL": "", +- (caps & ARCH_CAPS_SSB_NO) ? " SSB_NO" : ""); ++ (caps & ARCH_CAPS_SSB_NO) ? " SSB_NO" : "", ++ (caps & ARCH_CAPS_MDS_NO) ? " MDS_NO" : ""); + + /* Compiled-in support which pertains to mitigations. */ + if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) ) +@@ -339,23 +341,25 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) + * mitigation support for guests. + */ + #ifdef CONFIG_HVM +- printk(" Support for HVM VMs:%s%s%s%s\n", ++ printk(" Support for HVM VMs:%s%s%s%s%s\n", + (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) || + boot_cpu_has(X86_FEATURE_SC_RSB_HVM) || + opt_eager_fpu) ? "" : " None", + boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ? " MSR_SPEC_CTRL" : "", + boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ? " RSB" : "", +- opt_eager_fpu ? " EAGER_FPU" : ""); ++ opt_eager_fpu ? " EAGER_FPU" : "", ++ boot_cpu_has(X86_FEATURE_MD_CLEAR) ? " MD_CLEAR" : ""); + + #endif + #ifdef CONFIG_PV +- printk(" Support for PV VMs:%s%s%s%s\n", ++ printk(" Support for PV VMs:%s%s%s%s%s\n", + (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || + boot_cpu_has(X86_FEATURE_SC_RSB_PV) || + opt_eager_fpu) ? "" : " None", + boot_cpu_has(X86_FEATURE_SC_MSR_PV) ? " MSR_SPEC_CTRL" : "", + boot_cpu_has(X86_FEATURE_SC_RSB_PV) ? " RSB" : "", +- opt_eager_fpu ? " EAGER_FPU" : ""); ++ opt_eager_fpu ? " EAGER_FPU" : "", ++ boot_cpu_has(X86_FEATURE_MD_CLEAR) ? " MD_CLEAR" : ""); + + printk(" XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n", + opt_xpti_hwdom ? "enabled" : "disabled", +diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h +index 389f95f..637259b 100644 +--- a/xen/include/asm-x86/msr-index.h ++++ b/xen/include/asm-x86/msr-index.h +@@ -51,6 +51,7 @@ + #define ARCH_CAPS_RSBA (_AC(1, ULL) << 2) + #define ARCH_CAPS_SKIP_L1DFL (_AC(1, ULL) << 3) + #define ARCH_CAPS_SSB_NO (_AC(1, ULL) << 4) ++#define ARCH_CAPS_MDS_NO (_AC(1, ULL) << 5) + + #define MSR_FLUSH_CMD 0x0000010b + #define FLUSH_CMD_L1D (_AC(1, ULL) << 0) +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index 2bcc548..55231d4 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -242,6 +242,7 @@ XEN_CPUFEATURE(IBPB, 8*32+12) /*A IBPB support only (no IBRS, used by + /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */ + XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */ + XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single Precision */ ++XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*A VERW clears microarchitectural buffers */ + XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */ + XEN_CPUFEATURE(IBRSB, 9*32+26) /*A IBRS and IBPB support (used by Intel) */ + XEN_CPUFEATURE(STIBP, 9*32+27) /*A STIBP */ Index: head/emulators/xen-kernel/files/xsa297-4.12-6.patch =================================================================== --- head/emulators/xen-kernel/files/xsa297-4.12-6.patch +++ head/emulators/xen-kernel/files/xsa297-4.12-6.patch @@ -0,0 +1,134 @@ +From: Andrew Cooper +Subject: x86/spec-ctrl: Infrastructure to use VERW to flush pipeline buffers + +Three synthetic features are introduced, as we need individual control of +each, depending on circumstances. A later change will enable them at +appropriate points. + +The verw_sel field doesn't strictly need to live in struct cpu_info. It lives +there because there is a convenient hole it can fill, and it reduces the +complexity of the SPEC_CTRL_EXIT_TO_{PV,HVM} assembly by avoiding the need for +any temporary stack maintenance. + +This is part of XSA-297, CVE-2018-12126, CVE-2018-12127, CVE-2018-12130, CVE-2019-11091. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c +index 052228c..33930ce 100644 +--- a/xen/arch/x86/x86_64/asm-offsets.c ++++ b/xen/arch/x86/x86_64/asm-offsets.c +@@ -110,6 +110,7 @@ void __dummy__(void) + BLANK(); + + OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs); ++ OFFSET(CPUINFO_verw_sel, struct cpu_info, verw_sel); + OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu); + OFFSET(CPUINFO_cr4, struct cpu_info, cr4); + OFFSET(CPUINFO_xen_cr3, struct cpu_info, xen_cr3); +diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h +index 0c06274..ba55245 100644 +--- a/xen/include/asm-x86/cpufeatures.h ++++ b/xen/include/asm-x86/cpufeatures.h +@@ -31,3 +31,6 @@ XEN_CPUFEATURE(SC_RSB_PV, (FSCAPINTS+0)*32+18) /* RSB overwrite needed for + XEN_CPUFEATURE(SC_RSB_HVM, (FSCAPINTS+0)*32+19) /* RSB overwrite needed for HVM */ + XEN_CPUFEATURE(SC_MSR_IDLE, (FSCAPINTS+0)*32+21) /* (SC_MSR_PV || SC_MSR_HVM) && default_xen_spec_ctrl */ + XEN_CPUFEATURE(XEN_LBR, (FSCAPINTS+0)*32+22) /* Xen uses MSR_DEBUGCTL.LBR */ ++XEN_CPUFEATURE(SC_VERW_PV, (FSCAPINTS+0)*32+23) /* VERW used by Xen for PV */ ++XEN_CPUFEATURE(SC_VERW_HVM, (FSCAPINTS+0)*32+24) /* VERW used by Xen for HVM */ ++XEN_CPUFEATURE(SC_VERW_IDLE, (FSCAPINTS+0)*32+25) /* VERW used by Xen for idle */ +diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h +index 5bd64b2..f3508c3 100644 +--- a/xen/include/asm-x86/current.h ++++ b/xen/include/asm-x86/current.h +@@ -38,6 +38,7 @@ struct vcpu; + struct cpu_info { + struct cpu_user_regs guest_cpu_user_regs; + unsigned int processor_id; ++ unsigned int verw_sel; + struct vcpu *current_vcpu; + unsigned long per_cpu_offset; + unsigned long cr4; +diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h +index 20ee112..ba03bb4 100644 +--- a/xen/include/asm-x86/spec_ctrl.h ++++ b/xen/include/asm-x86/spec_ctrl.h +@@ -60,6 +60,13 @@ static inline void init_shadow_spec_ctrl_state(void) + info->shadow_spec_ctrl = 0; + info->xen_spec_ctrl = default_xen_spec_ctrl; + info->spec_ctrl_flags = default_spec_ctrl_flags; ++ ++ /* ++ * For least latency, the VERW selector should be a writeable data ++ * descriptor resident in the cache. __HYPERVISOR_DS32 shares a cache ++ * line with __HYPERVISOR_CS, so is expected to be very cache-hot. ++ */ ++ info->verw_sel = __HYPERVISOR_DS32; + } + + /* WARNING! `ret`, `call *`, `jmp *` not safe after this call. */ +@@ -80,6 +87,22 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info) + alternative_input("", "wrmsr", X86_FEATURE_SC_MSR_IDLE, + "a" (val), "c" (MSR_SPEC_CTRL), "d" (0)); + barrier(); ++ ++ /* ++ * Microarchitectural Store Buffer Data Sampling: ++ * ++ * On vulnerable systems, store buffer entries are statically partitioned ++ * between active threads. When entering idle, our store buffer entries ++ * are re-partitioned to allow the other threads to use them. ++ * ++ * Flush the buffers to ensure that no sensitive data of ours can be ++ * leaked by a sibling after it gets our store buffer entries. ++ * ++ * Note: VERW must be encoded with a memory operand, as it is only that ++ * form which causes a flush. ++ */ ++ alternative_input("", "verw %[sel]", X86_FEATURE_SC_VERW_IDLE, ++ [sel] "m" (info->verw_sel)); + } + + /* WARNING! `ret`, `call *`, `jmp *` not safe before this call. */ +@@ -98,6 +121,17 @@ static always_inline void spec_ctrl_exit_idle(struct cpu_info *info) + alternative_input("", "wrmsr", X86_FEATURE_SC_MSR_IDLE, + "a" (val), "c" (MSR_SPEC_CTRL), "d" (0)); + barrier(); ++ ++ /* ++ * Microarchitectural Store Buffer Data Sampling: ++ * ++ * On vulnerable systems, store buffer entries are statically partitioned ++ * between active threads. When exiting idle, the other threads store ++ * buffer entries are re-partitioned to give us some. ++ * ++ * We now have store buffer entries with stale data from sibling threads. ++ * A flush if necessary will be performed on the return to guest path. ++ */ + } + + #endif /* __ASSEMBLY__ */ +diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h +index 803f7ce..c60093b 100644 +--- a/xen/include/asm-x86/spec_ctrl_asm.h ++++ b/xen/include/asm-x86/spec_ctrl_asm.h +@@ -241,12 +241,16 @@ + /* Use when exiting to PV guest context. */ + #define SPEC_CTRL_EXIT_TO_PV \ + ALTERNATIVE "", \ +- DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV ++ DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV; \ ++ ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), \ ++ X86_FEATURE_SC_VERW_PV + + /* Use when exiting to HVM guest context. */ + #define SPEC_CTRL_EXIT_TO_HVM \ + ALTERNATIVE "", \ +- DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_HVM ++ DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_HVM; \ ++ ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), \ ++ X86_FEATURE_SC_VERW_HVM + + /* + * Use in IST interrupt/exception context. May interrupt Xen or PV context. Index: head/emulators/xen-kernel/files/xsa297-4.12-7.patch =================================================================== --- head/emulators/xen-kernel/files/xsa297-4.12-7.patch +++ head/emulators/xen-kernel/files/xsa297-4.12-7.patch @@ -0,0 +1,316 @@ +From: Andrew Cooper +Subject: x86/spec-ctrl: Introduce options to control VERW flushing + +The Microarchitectural Data Sampling vulnerability is split into categories +with subtly different properties: + + MLPDS - Microarchitectural Load Port Data Sampling + MSBDS - Microarchitectural Store Buffer Data Sampling + MFBDS - Microarchitectural Fill Buffer Data Sampling + MDSUM - Microarchitectural Data Sampling Uncacheable Memory + +MDSUM is a special case of the other three, and isn't distinguished further. + +These issues pertain to three microarchitectural buffers. The Load Ports, the +Store Buffers and the Fill Buffers. Each of these structures are flushed by +the new enhanced VERW functionality, but the conditions under which flushing +is necessary vary. + +For this concise overview of the issues and default logic, the abbreviations +SP (Store Port), FB (Fill Buffer), LP (Load Port) and HT (Hyperthreading) are +used for brevity: + + * Vulnerable hardware is divided into two categories - parts which suffer + from SP only, and parts with any other combination of vulnerabilities. + + * SP only has an HT interaction when the thread goes idle, due to the static + partitioning of resources. LP and FB have HT interactions at all points, + due to the competitive sharing of resources. All issues potentially leak + data across the return-to-guest transition. + + * The microcode which implements VERW flushing also extends MSR_FLUSH_CMD, so + we don't need to do both on the HVM return-to-guest path. However, some + parts are not vulnerable to L1TF (therefore have no MSR_FLUSH_CMD), but are + vulnerable to MDS, so do require VERW on the HVM path. + +Note that we deliberately support mds=1 even without MD_CLEAR in case the +microcode has been updated but the feature bit not exposed. + +This is part of XSA-297, CVE-2018-12126, CVE-2018-12127, CVE-2018-12130, CVE-2019-11091. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc +index f80d8d8..85081fd 100644 +--- a/docs/misc/xen-command-line.pandoc ++++ b/docs/misc/xen-command-line.pandoc +@@ -1895,7 +1895,7 @@ not be able to control the state of the mitigation. + By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). + + ### spec-ctrl (x86) +-> `= List of [ , xen=, {pv,hvm,msr-sc,rsb}=, ++> `= List of [ , xen=, {pv,hvm,msr-sc,rsb,md-clear}=, + > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,eager-fpu, + > l1d-flush}= ]` + +@@ -1919,9 +1919,10 @@ in place for guests to use. + + Use of a positive boolean value for either of these options is invalid. + +-The booleans `pv=`, `hvm=`, `msr-sc=` and `rsb=` offer fine grained control +-over the alternative blocks used by Xen. These impact Xen's ability to +-protect itself, and Xen's ability to virtualise support for guests to use. ++The booleans `pv=`, `hvm=`, `msr-sc=`, `rsb=` and `md-clear=` offer fine ++grained control over the alternative blocks used by Xen. These impact Xen's ++ability to protect itself, and Xen's ability to virtualise support for guests ++to use. + + * `pv=` and `hvm=` offer control over all suboptions for PV and HVM guests + respectively. +@@ -1930,6 +1931,11 @@ protect itself, and Xen's ability to virtualise support for guests to use. + guests and if disabled, guests will be unable to use IBRS/STIBP/SSBD/etc. + * `rsb=` offers control over whether to overwrite the Return Stack Buffer / + Return Address Stack on entry to Xen. ++* `md-clear=` offers control over whether to use VERW to flush ++ microarchitectural buffers on idle and exit from Xen. *Note: For ++ compatibility with development versions of this fix, `mds=` is also accepted ++ on Xen 4.12 and earlier as an alias. Consult vendor documentation in ++ preference to here.* + + If Xen was compiled with INDIRECT_THUNK support, `bti-thunk=` can be used to + select which of the thunks gets patched into the `__x86_indirect_thunk_%reg` +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index a573b02..0509ac8 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -35,6 +35,8 @@ static bool __initdata opt_msr_sc_pv = true; + static bool __initdata opt_msr_sc_hvm = true; + static bool __initdata opt_rsb_pv = true; + static bool __initdata opt_rsb_hvm = true; ++static int8_t __initdata opt_md_clear_pv = -1; ++static int8_t __initdata opt_md_clear_hvm = -1; + + /* Cmdline controls for Xen's speculative settings. */ + static enum ind_thunk { +@@ -59,6 +61,9 @@ paddr_t __read_mostly l1tf_addr_mask, __read_mostly l1tf_safe_maddr; + static bool __initdata cpu_has_bug_l1tf; + static unsigned int __initdata l1d_maxphysaddr; + ++static bool __initdata cpu_has_bug_msbds_only; /* => minimal HT impact. */ ++static bool __initdata cpu_has_bug_mds; /* Any other M{LP,SB,FB}DS combination. */ ++ + static int __init parse_spec_ctrl(const char *s) + { + const char *ss; +@@ -94,6 +99,8 @@ static int __init parse_spec_ctrl(const char *s) + disable_common: + opt_rsb_pv = false; + opt_rsb_hvm = false; ++ opt_md_clear_pv = 0; ++ opt_md_clear_hvm = 0; + + opt_thunk = THUNK_JMP; + opt_ibrs = 0; +@@ -116,11 +123,13 @@ static int __init parse_spec_ctrl(const char *s) + { + opt_msr_sc_pv = val; + opt_rsb_pv = val; ++ opt_md_clear_pv = val; + } + else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) + { + opt_msr_sc_hvm = val; + opt_rsb_hvm = val; ++ opt_md_clear_hvm = val; + } + else if ( (val = parse_boolean("msr-sc", s, ss)) >= 0 ) + { +@@ -132,6 +141,12 @@ static int __init parse_spec_ctrl(const char *s) + opt_rsb_pv = val; + opt_rsb_hvm = val; + } ++ else if ( (val = parse_boolean("md-clear", s, ss)) >= 0 || ++ (val = parse_boolean("mds", s, ss)) >= 0 ) ++ { ++ opt_md_clear_pv = val; ++ opt_md_clear_hvm = val; ++ } + + /* Xen's speculative sidechannel mitigation settings. */ + else if ( !strncmp(s, "bti-thunk=", 10) ) +@@ -317,7 +332,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) + "\n"); + + /* Settings for Xen's protection, irrespective of guests. */ +- printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s, Other:%s%s\n", ++ printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s, Other:%s%s%s\n", + thunk == THUNK_NONE ? "N/A" : + thunk == THUNK_RETPOLINE ? "RETPOLINE" : + thunk == THUNK_LFENCE ? "LFENCE" : +@@ -327,7 +342,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) + !boot_cpu_has(X86_FEATURE_SSBD) ? "" : + (default_xen_spec_ctrl & SPEC_CTRL_SSBD) ? " SSBD+" : " SSBD-", + opt_ibpb ? " IBPB" : "", +- opt_l1d_flush ? " L1D_FLUSH" : ""); ++ opt_l1d_flush ? " L1D_FLUSH" : "", ++ opt_md_clear_pv || opt_md_clear_hvm ? " VERW" : ""); + + /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */ + if ( cpu_has_bug_l1tf || opt_pv_l1tf_hwdom || opt_pv_l1tf_domu ) +@@ -737,6 +753,107 @@ static __init void l1tf_calculations(uint64_t caps) + : (3ul << (paddr_bits - 2)))); + } + ++/* Calculate whether this CPU is vulnerable to MDS. */ ++static __init void mds_calculations(uint64_t caps) ++{ ++ /* MDS is only known to affect Intel Family 6 processors at this time. */ ++ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || ++ boot_cpu_data.x86 != 6 ) ++ return; ++ ++ /* Any processor advertising MDS_NO should be not vulnerable to MDS. */ ++ if ( caps & ARCH_CAPS_MDS_NO ) ++ return; ++ ++ switch ( boot_cpu_data.x86_model ) ++ { ++ /* ++ * Core processors since at least Nehalem are vulnerable. ++ */ ++ case 0x1f: /* Auburndale / Havendale */ ++ case 0x1e: /* Nehalem */ ++ case 0x1a: /* Nehalem EP */ ++ case 0x2e: /* Nehalem EX */ ++ case 0x25: /* Westmere */ ++ case 0x2c: /* Westmere EP */ ++ case 0x2f: /* Westmere EX */ ++ case 0x2a: /* SandyBridge */ ++ case 0x2d: /* SandyBridge EP/EX */ ++ case 0x3a: /* IvyBridge */ ++ case 0x3e: /* IvyBridge EP/EX */ ++ case 0x3c: /* Haswell */ ++ case 0x3f: /* Haswell EX/EP */ ++ case 0x45: /* Haswell D */ ++ case 0x46: /* Haswell H */ ++ case 0x3d: /* Broadwell */ ++ case 0x47: /* Broadwell H */ ++ case 0x4f: /* Broadwell EP/EX */ ++ case 0x56: /* Broadwell D */ ++ case 0x4e: /* Skylake M */ ++ case 0x5e: /* Skylake D */ ++ cpu_has_bug_mds = true; ++ break; ++ ++ /* ++ * Some Core processors have per-stepping vulnerability. ++ */ ++ case 0x55: /* Skylake-X / Cascade Lake */ ++ if ( boot_cpu_data.x86_mask <= 5 ) ++ cpu_has_bug_mds = true; ++ break; ++ ++ case 0x8e: /* Kaby / Coffee / Whiskey Lake M */ ++ if ( boot_cpu_data.x86_mask <= 0xb ) ++ cpu_has_bug_mds = true; ++ break; ++ ++ case 0x9e: /* Kaby / Coffee / Whiskey Lake D */ ++ if ( boot_cpu_data.x86_mask <= 0xc ) ++ cpu_has_bug_mds = true; ++ break; ++ ++ /* ++ * Very old and very new Atom processors are not vulnerable. ++ */ ++ case 0x1c: /* Pineview */ ++ case 0x26: /* Lincroft */ ++ case 0x27: /* Penwell */ ++ case 0x35: /* Cloverview */ ++ case 0x36: /* Cedarview */ ++ case 0x7a: /* Goldmont */ ++ break; ++ ++ /* ++ * Middling Atom processors are vulnerable to just the Store Buffer ++ * aspect. ++ */ ++ case 0x37: /* Baytrail / Valleyview (Silvermont) */ ++ case 0x4a: /* Merrifield */ ++ case 0x4c: /* Cherrytrail / Brasswell */ ++ case 0x4d: /* Avaton / Rangely (Silvermont) */ ++ case 0x5a: /* Moorefield */ ++ case 0x5d: ++ case 0x65: ++ case 0x6e: ++ case 0x75: ++ /* ++ * Knights processors (which are based on the Silvermont/Airmont ++ * microarchitecture) are similarly only affected by the Store Buffer ++ * aspect. ++ */ ++ case 0x57: /* Knights Landing */ ++ case 0x85: /* Knights Mill */ ++ cpu_has_bug_msbds_only = true; ++ break; ++ ++ default: ++ printk("Unrecognised CPU model %#x - assuming vulnerable to MDS\n", ++ boot_cpu_data.x86_model); ++ cpu_has_bug_mds = true; ++ break; ++ } ++} ++ + void __init init_speculation_mitigations(void) + { + enum ind_thunk thunk = THUNK_DEFAULT; +@@ -924,6 +1041,47 @@ void __init init_speculation_mitigations(void) + "enabled. Please assess your configuration and choose an\n" + "explicit 'smt=' setting. See XSA-273.\n"); + ++ mds_calculations(caps); ++ ++ /* ++ * By default, enable PV and HVM mitigations on MDS-vulnerable hardware. ++ * This will only be a token effort for MLPDS/MFBDS when HT is enabled, ++ * but it is somewhat better than nothing. ++ */ ++ if ( opt_md_clear_pv == -1 ) ++ opt_md_clear_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && ++ boot_cpu_has(X86_FEATURE_MD_CLEAR)); ++ if ( opt_md_clear_hvm == -1 ) ++ opt_md_clear_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && ++ boot_cpu_has(X86_FEATURE_MD_CLEAR)); ++ ++ /* ++ * Enable MDS defences as applicable. The PV blocks need using all the ++ * time, and the Idle blocks need using if either PV or HVM defences are ++ * used. ++ * ++ * HVM is more complicated. The MD_CLEAR microcode extends L1D_FLUSH with ++ * equivelent semantics to avoid needing to perform both flushes on the ++ * HVM path. The HVM blocks don't need activating if our hypervisor told ++ * us it was handling L1D_FLUSH, or we are using L1D_FLUSH ourselves. ++ */ ++ if ( opt_md_clear_pv ) ++ setup_force_cpu_cap(X86_FEATURE_SC_VERW_PV); ++ if ( opt_md_clear_pv || opt_md_clear_hvm ) ++ setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE); ++ if ( opt_md_clear_hvm && !(caps & ARCH_CAPS_SKIP_L1DFL) && !opt_l1d_flush ) ++ setup_force_cpu_cap(X86_FEATURE_SC_VERW_HVM); ++ ++ /* ++ * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT ++ * active and no explicit SMT choice. ++ */ ++ if ( opt_smt == -1 && cpu_has_bug_mds && hw_smt_enabled ) ++ warning_add( ++ "Booted on MLPDS/MFBDS-vulnerable hardware with SMT/Hyperthreading\n" ++ "enabled. Mitigations will not be fully effective. Please\n" ++ "choose an explicit smt= setting. See XSA-297.\n"); ++ + print_details(thunk, caps); + + /* Index: head/sysutils/xen-tools/Makefile =================================================================== --- head/sysutils/xen-tools/Makefile +++ head/sysutils/xen-tools/Makefile @@ -3,7 +3,7 @@ PORTNAME= xen PKGNAMESUFFIX= -tools PORTVERSION= 4.12.0 -PORTREVISION= 0 +PORTREVISION= 1 CATEGORIES= sysutils emulators MASTER_SITES= http://downloads.xenproject.org/release/xen/${PORTVERSION}/ @@ -54,7 +54,21 @@ INSTALL_TARGET= install-tools DOCS_INSTALL_TARGET= install-docs -EXTRA_PATCHES+= ${FILESDIR}/0001-tools-include-propagate-python-interpreter-path.patch:-p1 +EXTRA_PATCHES+= ${PATCHDIR}/0001-tools-include-propagate-python-interpreter-path.patch:-p1 + +# Pre-patches for XSA-297 to apply cleanly +EXTRA_PATCHES+= ${PATCHDIR}/0001-x86-tsx-Implement-controls-for-RTM-force-abort-mode.patch:-p1 \ + ${PATCHDIR}/0001-x86-msr-Shorten-ARCH_CAPABILITIES_-constants.patch:-p1 \ + ${PATCHDIR}/0002-x86-spec-ctrl-Extend-repoline-safey-calcuations-for-.patch:-p1 + +# Fixes for XSA-297 (MDS) +EXTRA_PATCHES+= ${PATCHDIR}/xsa297-4.12-1.patch:-p1 \ + ${PATCHDIR}/xsa297-4.12-2.patch:-p1 \ + ${PATCHDIR}/xsa297-4.12-3.patch:-p1 \ + ${PATCHDIR}/xsa297-4.12-4.patch:-p1 \ + ${PATCHDIR}/xsa297-4.12-5.patch:-p1 \ + ${PATCHDIR}/xsa297-4.12-6.patch:-p1 \ + ${PATCHDIR}/xsa297-4.12-7.patch:-p1 .include Index: head/sysutils/xen-tools/files/0001-x86-msr-Shorten-ARCH_CAPABILITIES_-constants.patch =================================================================== --- head/sysutils/xen-tools/files/0001-x86-msr-Shorten-ARCH_CAPABILITIES_-constants.patch +++ head/sysutils/xen-tools/files/0001-x86-msr-Shorten-ARCH_CAPABILITIES_-constants.patch @@ -0,0 +1,71 @@ +From e25d1338e1d8a32e46c808321323c4ad8fc5ba01 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 3 May 2019 10:44:58 +0200 +Subject: [PATCH 1/2] x86/msr: Shorten ARCH_CAPABILITIES_* constants + +They are unnecesserily verbose, and ARCH_CAPS_* is already the more common +version. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +master commit: ba27aaa88548c824a47dcf5609288ee1c05d2946 +master date: 2019-03-18 16:26:40 +0000 +--- + xen/arch/x86/spec_ctrl.c | 10 +++++----- + xen/include/asm-x86/msr-index.h | 4 ++-- + 2 files changed, 7 insertions(+), 7 deletions(-) + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index ad72ecd3a5..22bfc5a5e8 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -230,8 +230,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) + (_7d0 & cpufeat_mask(X86_FEATURE_L1D_FLUSH)) ? " L1D_FLUSH" : "", + (_7d0 & cpufeat_mask(X86_FEATURE_SSBD)) ? " SSBD" : "", + (e8b & cpufeat_mask(X86_FEATURE_IBPB)) ? " IBPB" : "", +- (caps & ARCH_CAPABILITIES_IBRS_ALL) ? " IBRS_ALL" : "", +- (caps & ARCH_CAPABILITIES_RDCL_NO) ? " RDCL_NO" : "", ++ (caps & ARCH_CAPS_IBRS_ALL) ? " IBRS_ALL" : "", ++ (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "", + (caps & ARCH_CAPS_RSBA) ? " RSBA" : "", + (caps & ARCH_CAPS_SKIP_L1DFL) ? " SKIP_L1DFL": "", + (caps & ARCH_CAPS_SSB_NO) ? " SSB_NO" : ""); +@@ -549,7 +549,7 @@ static __init void l1tf_calculations(uint64_t caps) + } + + /* Any processor advertising RDCL_NO should be not vulnerable to L1TF. */ +- if ( caps & ARCH_CAPABILITIES_RDCL_NO ) ++ if ( caps & ARCH_CAPS_RDCL_NO ) + cpu_has_bug_l1tf = false; + + if ( cpu_has_bug_l1tf && hit_default ) +@@ -613,9 +613,9 @@ int8_t __read_mostly opt_xpti_domu = -1; + static __init void xpti_init_default(uint64_t caps) + { + if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) +- caps = ARCH_CAPABILITIES_RDCL_NO; ++ caps = ARCH_CAPS_RDCL_NO; + +- if ( caps & ARCH_CAPABILITIES_RDCL_NO ) ++ if ( caps & ARCH_CAPS_RDCL_NO ) + { + if ( opt_xpti_hwdom < 0 ) + opt_xpti_hwdom = 0; +diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h +index c6e1d8768f..11512d4250 100644 +--- a/xen/include/asm-x86/msr-index.h ++++ b/xen/include/asm-x86/msr-index.h +@@ -42,8 +42,8 @@ + #define PRED_CMD_IBPB (_AC(1, ULL) << 0) + + #define MSR_ARCH_CAPABILITIES 0x0000010a +-#define ARCH_CAPABILITIES_RDCL_NO (_AC(1, ULL) << 0) +-#define ARCH_CAPABILITIES_IBRS_ALL (_AC(1, ULL) << 1) ++#define ARCH_CAPS_RDCL_NO (_AC(1, ULL) << 0) ++#define ARCH_CAPS_IBRS_ALL (_AC(1, ULL) << 1) + #define ARCH_CAPS_RSBA (_AC(1, ULL) << 2) + #define ARCH_CAPS_SKIP_L1DFL (_AC(1, ULL) << 3) + #define ARCH_CAPS_SSB_NO (_AC(1, ULL) << 4) +-- +2.17.2 (Apple Git-113) + Index: head/sysutils/xen-tools/files/0001-x86-tsx-Implement-controls-for-RTM-force-abort-mode.patch =================================================================== --- head/sysutils/xen-tools/files/0001-x86-tsx-Implement-controls-for-RTM-force-abort-mode.patch +++ head/sysutils/xen-tools/files/0001-x86-tsx-Implement-controls-for-RTM-force-abort-mode.patch @@ -0,0 +1,185 @@ +From e3a1ebee98e61dfe3b8b1eb71043ad5220b3cfd3 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 3 May 2019 10:39:29 +0200 +Subject: [PATCH] x86/tsx: Implement controls for RTM force-abort mode + +The CPUID bit and MSR are deliberately not exposed to guests, because they +won't exist on newer processors. As vPMU isn't security supported, the +misbehaviour of PCR3 isn't expected to impact production deployments. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich +master commit: 6be613f29b4205349275d24367bd4c82fb2960dd +master date: 2019-03-12 17:05:21 +0000 +--- + docs/misc/xen-command-line.pandoc | 17 ++++++++++++++++- + tools/misc/xen-cpuid.c | 2 ++ + xen/arch/x86/cpu/intel.c | 3 +++ + xen/arch/x86/cpu/vpmu.c | 3 +++ + xen/arch/x86/msr.c | 4 ++++ + xen/include/asm-x86/cpufeature.h | 3 +++ + xen/include/asm-x86/msr-index.h | 3 +++ + xen/include/asm-x86/vpmu.h | 1 + + xen/include/public/arch-x86/cpufeatureset.h | 1 + + 9 files changed, 36 insertions(+), 1 deletion(-) + +diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc +index 742555616d..6db82f302e 100644 +--- a/docs/misc/xen-command-line.pandoc ++++ b/docs/misc/xen-command-line.pandoc +@@ -2109,7 +2109,7 @@ Use Virtual Processor ID support if available. This prevents the need for TLB + flushes on VM entry and exit, increasing performance. + + ### vpmu (x86) +- = List of [ , bts, ipc, arch ] ++ = List of [ , bts, ipc, arch, rtm-abort= ] + + Applicability: x86. Default: false + +@@ -2142,6 +2142,21 @@ provide access to a wealth of low level processor information. + + * The `arch` option allows access to the pre-defined architectural events. + ++* The `rtm-abort` boolean controls a trade-off between working Restricted ++ Transactional Memory, and working performance counters. ++ ++ All processors released to date (Q1 2019) supporting Transactional Memory ++ Extensions suffer an erratum which has been addressed in microcode. ++ ++ Processors based on the Skylake microarchitecture with up-to-date ++ microcode internally use performance counter 3 to work around the erratum. ++ A consequence is that the counter gets reprogrammed whenever an `XBEGIN` ++ instruction is executed. ++ ++ An alternative mode exists where PCR3 behaves as before, at the cost of ++ `XBEGIN` unconditionally aborting. Enabling `rtm-abort` mode will ++ activate this alternative mode. ++ + *Warning:* + As the virtualisation is not 100% safe, don't use the vpmu flag on + production systems (see http://xenbits.xen.org/xsa/advisory-163.html)! +diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c +index 6e7ca8b9a4..d87a72e3e6 100644 +--- a/tools/misc/xen-cpuid.c ++++ b/tools/misc/xen-cpuid.c +@@ -146,6 +146,8 @@ static const char *str_7d0[32] = + { + [ 2] = "avx512_4vnniw", [ 3] = "avx512_4fmaps", + ++ /* 12 */ [13] = "tsx-force-abort", ++ + [26] = "ibrsb", [27] = "stibp", + [28] = "l1d_flush", [29] = "arch_caps", + /* 30 */ [31] = "ssbd", +diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c +index 65fa3d611f..29c6b87512 100644 +--- a/xen/arch/x86/cpu/intel.c ++++ b/xen/arch/x86/cpu/intel.c +@@ -286,6 +286,9 @@ static void Intel_errata_workarounds(struct cpuinfo_x86 *c) + if (c->x86 == 6 && cpu_has_clflush && + (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47)) + __set_bit(X86_FEATURE_CLFLUSH_MONITOR, c->x86_capability); ++ ++ if (cpu_has_tsx_force_abort && opt_rtm_abort) ++ wrmsrl(MSR_TSX_FORCE_ABORT, TSX_FORCE_ABORT_RTM); + } + + +diff --git a/xen/arch/x86/cpu/vpmu.c b/xen/arch/x86/cpu/vpmu.c +index 8324d62f11..8f6daf13fd 100644 +--- a/xen/arch/x86/cpu/vpmu.c ++++ b/xen/arch/x86/cpu/vpmu.c +@@ -45,6 +45,7 @@ CHECK_pmu_params; + static unsigned int __read_mostly opt_vpmu_enabled; + unsigned int __read_mostly vpmu_mode = XENPMU_MODE_OFF; + unsigned int __read_mostly vpmu_features = 0; ++bool __read_mostly opt_rtm_abort; + + static DEFINE_SPINLOCK(vpmu_lock); + static unsigned vpmu_count; +@@ -73,6 +74,8 @@ static int __init parse_vpmu_params(const char *s) + vpmu_features |= XENPMU_FEATURE_IPC_ONLY; + else if ( !cmdline_strcmp(s, "arch") ) + vpmu_features |= XENPMU_FEATURE_ARCH_ONLY; ++ else if ( (val = parse_boolean("rtm-abort", s, ss)) >= 0 ) ++ opt_rtm_abort = val; + else + rc = -EINVAL; + +diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c +index 9bb38b6d66..4df4a59f4d 100644 +--- a/xen/arch/x86/msr.c ++++ b/xen/arch/x86/msr.c +@@ -131,6 +131,8 @@ int guest_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) + case MSR_PRED_CMD: + case MSR_FLUSH_CMD: + /* Write-only */ ++ case MSR_TSX_FORCE_ABORT: ++ /* Not offered to guests. */ + goto gp_fault; + + case MSR_SPEC_CTRL: +@@ -230,6 +232,8 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) + case MSR_INTEL_PLATFORM_INFO: + case MSR_ARCH_CAPABILITIES: + /* Read-only */ ++ case MSR_TSX_FORCE_ABORT: ++ /* Not offered to guests. */ + goto gp_fault; + + case MSR_AMD_PATCHLOADER: +diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h +index 1fb9af4b19..745801f3c0 100644 +--- a/xen/include/asm-x86/cpufeature.h ++++ b/xen/include/asm-x86/cpufeature.h +@@ -112,6 +112,9 @@ + /* CPUID level 0x80000007.edx */ + #define cpu_has_itsc boot_cpu_has(X86_FEATURE_ITSC) + ++/* CPUID level 0x00000007:0.edx */ ++#define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) ++ + /* Synthesized. */ + #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) + #define cpu_has_cpuid_faulting boot_cpu_has(X86_FEATURE_CPUID_FAULTING) +diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h +index 24d783a72d..c6e1d8768f 100644 +--- a/xen/include/asm-x86/msr-index.h ++++ b/xen/include/asm-x86/msr-index.h +@@ -51,6 +51,9 @@ + #define MSR_FLUSH_CMD 0x0000010b + #define FLUSH_CMD_L1D (_AC(1, ULL) << 0) + ++#define MSR_TSX_FORCE_ABORT 0x0000010f ++#define TSX_FORCE_ABORT_RTM (_AC(1, ULL) << 0) ++ + /* Intel MSRs. Some also available on other CPUs */ + #define MSR_IA32_PERFCTR0 0x000000c1 + #define MSR_IA32_A_PERFCTR0 0x000004c1 +diff --git a/xen/include/asm-x86/vpmu.h b/xen/include/asm-x86/vpmu.h +index 5e778ab7ba..1287b9fb6e 100644 +--- a/xen/include/asm-x86/vpmu.h ++++ b/xen/include/asm-x86/vpmu.h +@@ -125,6 +125,7 @@ static inline int vpmu_do_rdmsr(unsigned int msr, uint64_t *msr_content) + + extern unsigned int vpmu_mode; + extern unsigned int vpmu_features; ++extern bool opt_rtm_abort; + + /* Context switch */ + static inline void vpmu_switch_from(struct vcpu *prev) +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index fbc68fa29f..2bcc5487ac 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -242,6 +242,7 @@ XEN_CPUFEATURE(IBPB, 8*32+12) /*A IBPB support only (no IBRS, used by + /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */ + XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */ + XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single Precision */ ++XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */ + XEN_CPUFEATURE(IBRSB, 9*32+26) /*A IBRS and IBPB support (used by Intel) */ + XEN_CPUFEATURE(STIBP, 9*32+27) /*A STIBP */ + XEN_CPUFEATURE(L1D_FLUSH, 9*32+28) /*S MSR_FLUSH_CMD and L1D flush. */ +-- +2.17.2 (Apple Git-113) + Index: head/sysutils/xen-tools/files/0002-x86-spec-ctrl-Extend-repoline-safey-calcuations-for-.patch =================================================================== --- head/sysutils/xen-tools/files/0002-x86-spec-ctrl-Extend-repoline-safey-calcuations-for-.patch +++ head/sysutils/xen-tools/files/0002-x86-spec-ctrl-Extend-repoline-safey-calcuations-for-.patch @@ -0,0 +1,68 @@ +From fd2a34c9655acecaaa1541dd84fc670936303175 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper +Date: Fri, 3 May 2019 10:45:45 +0200 +Subject: [PATCH 2/2] x86/spec-ctrl: Extend repoline safey calcuations for + eIBRS and Atom parts + +All currently-released Atom processors are in practice retpoline-safe, because +they don't fall back to a BTB prediction on RSB underflow. + +However, an additional meaning of Enhanced IRBS is that the processor may not +be retpoline-safe. The Gemini Lake platform, based on the Goldmont Plus +microarchitecture is the first Atom processor to support eIBRS. + +Until Xen gets full eIBRS support, Gemini Lake will still be safe using +regular IBRS. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich +master commit: 17f74242ccf0ce6e51c03a5860947865c0ef0dc2 +master date: 2019-03-18 16:26:40 +0000 +--- + xen/arch/x86/spec_ctrl.c | 22 +++++++++++++++++++++- + 1 file changed, 21 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 22bfc5a5e8..1171c02ab1 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -316,8 +316,11 @@ static bool __init retpoline_safe(uint64_t caps) + /* + * RSBA may be set by a hypervisor to indicate that we may move to a + * processor which isn't retpoline-safe. ++ * ++ * Processors offering Enhanced IBRS are not guarenteed to be ++ * repoline-safe. + */ +- if ( caps & ARCH_CAPS_RSBA ) ++ if ( caps & (ARCH_CAPS_RSBA | ARCH_CAPS_IBRS_ALL) ) + return false; + + switch ( boot_cpu_data.x86_model ) +@@ -377,6 +380,23 @@ static bool __init retpoline_safe(uint64_t caps) + case 0x9e: + return false; + ++ /* ++ * Atom processors before Goldmont Plus/Gemini Lake are retpoline-safe. ++ */ ++ case 0x1c: /* Pineview */ ++ case 0x26: /* Lincroft */ ++ case 0x27: /* Penwell */ ++ case 0x35: /* Cloverview */ ++ case 0x36: /* Cedarview */ ++ case 0x37: /* Baytrail / Valleyview (Silvermont) */ ++ case 0x4d: /* Avaton / Rangely (Silvermont) */ ++ case 0x4c: /* Cherrytrail / Brasswell */ ++ case 0x4a: /* Merrifield */ ++ case 0x5a: /* Moorefield */ ++ case 0x5c: /* Goldmont */ ++ case 0x5f: /* Denverton */ ++ return true; ++ + default: + printk("Unrecognised CPU model %#x - assuming not reptpoline safe\n", + boot_cpu_data.x86_model); +-- +2.17.2 (Apple Git-113) + Index: head/sysutils/xen-tools/files/xsa297-4.12-1.patch =================================================================== --- head/sysutils/xen-tools/files/xsa297-4.12-1.patch +++ head/sysutils/xen-tools/files/xsa297-4.12-1.patch @@ -0,0 +1,163 @@ +From: Andrew Cooper +Subject: x86/spec-ctrl: Reposition the XPTI command line parsing logic + +It has ended up in the middle of the mitigation calculation logic. Move it to +be beside the other command line parsing. + +No functional change. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 1171c02..99310c8 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -167,6 +167,73 @@ static int __init parse_spec_ctrl(const char *s) + } + custom_param("spec-ctrl", parse_spec_ctrl); + ++int8_t __read_mostly opt_xpti_hwdom = -1; ++int8_t __read_mostly opt_xpti_domu = -1; ++ ++static __init void xpti_init_default(uint64_t caps) ++{ ++ if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) ++ caps = ARCH_CAPS_RDCL_NO; ++ ++ if ( caps & ARCH_CAPS_RDCL_NO ) ++ { ++ if ( opt_xpti_hwdom < 0 ) ++ opt_xpti_hwdom = 0; ++ if ( opt_xpti_domu < 0 ) ++ opt_xpti_domu = 0; ++ } ++ else ++ { ++ if ( opt_xpti_hwdom < 0 ) ++ opt_xpti_hwdom = 1; ++ if ( opt_xpti_domu < 0 ) ++ opt_xpti_domu = 1; ++ } ++} ++ ++static __init int parse_xpti(const char *s) ++{ ++ const char *ss; ++ int val, rc = 0; ++ ++ /* Interpret 'xpti' alone in its positive boolean form. */ ++ if ( *s == '\0' ) ++ opt_xpti_hwdom = opt_xpti_domu = 1; ++ ++ do { ++ ss = strchr(s, ','); ++ if ( !ss ) ++ ss = strchr(s, '\0'); ++ ++ switch ( parse_bool(s, ss) ) ++ { ++ case 0: ++ opt_xpti_hwdom = opt_xpti_domu = 0; ++ break; ++ ++ case 1: ++ opt_xpti_hwdom = opt_xpti_domu = 1; ++ break; ++ ++ default: ++ if ( !strcmp(s, "default") ) ++ opt_xpti_hwdom = opt_xpti_domu = -1; ++ else if ( (val = parse_boolean("dom0", s, ss)) >= 0 ) ++ opt_xpti_hwdom = val; ++ else if ( (val = parse_boolean("domu", s, ss)) >= 0 ) ++ opt_xpti_domu = val; ++ else if ( *s ) ++ rc = -EINVAL; ++ break; ++ } ++ ++ s = ss + 1; ++ } while ( *ss ); ++ ++ return rc; ++} ++custom_param("xpti", parse_xpti); ++ + int8_t __read_mostly opt_pv_l1tf_hwdom = -1; + int8_t __read_mostly opt_pv_l1tf_domu = -1; + +@@ -627,73 +694,6 @@ static __init void l1tf_calculations(uint64_t caps) + : (3ul << (paddr_bits - 2)))); + } + +-int8_t __read_mostly opt_xpti_hwdom = -1; +-int8_t __read_mostly opt_xpti_domu = -1; +- +-static __init void xpti_init_default(uint64_t caps) +-{ +- if ( boot_cpu_data.x86_vendor == X86_VENDOR_AMD ) +- caps = ARCH_CAPS_RDCL_NO; +- +- if ( caps & ARCH_CAPS_RDCL_NO ) +- { +- if ( opt_xpti_hwdom < 0 ) +- opt_xpti_hwdom = 0; +- if ( opt_xpti_domu < 0 ) +- opt_xpti_domu = 0; +- } +- else +- { +- if ( opt_xpti_hwdom < 0 ) +- opt_xpti_hwdom = 1; +- if ( opt_xpti_domu < 0 ) +- opt_xpti_domu = 1; +- } +-} +- +-static __init int parse_xpti(const char *s) +-{ +- const char *ss; +- int val, rc = 0; +- +- /* Interpret 'xpti' alone in its positive boolean form. */ +- if ( *s == '\0' ) +- opt_xpti_hwdom = opt_xpti_domu = 1; +- +- do { +- ss = strchr(s, ','); +- if ( !ss ) +- ss = strchr(s, '\0'); +- +- switch ( parse_bool(s, ss) ) +- { +- case 0: +- opt_xpti_hwdom = opt_xpti_domu = 0; +- break; +- +- case 1: +- opt_xpti_hwdom = opt_xpti_domu = 1; +- break; +- +- default: +- if ( !strcmp(s, "default") ) +- opt_xpti_hwdom = opt_xpti_domu = -1; +- else if ( (val = parse_boolean("dom0", s, ss)) >= 0 ) +- opt_xpti_hwdom = val; +- else if ( (val = parse_boolean("domu", s, ss)) >= 0 ) +- opt_xpti_domu = val; +- else if ( *s ) +- rc = -EINVAL; +- break; +- } +- +- s = ss + 1; +- } while ( *ss ); +- +- return rc; +-} +-custom_param("xpti", parse_xpti); +- + void __init init_speculation_mitigations(void) + { + enum ind_thunk thunk = THUNK_DEFAULT; Index: head/sysutils/xen-tools/files/xsa297-4.12-2.patch =================================================================== --- head/sysutils/xen-tools/files/xsa297-4.12-2.patch +++ head/sysutils/xen-tools/files/xsa297-4.12-2.patch @@ -0,0 +1,54 @@ +From: Andrew Cooper +Subject: x86/msr: Definitions for MSR_INTEL_CORE_THREAD_COUNT + +This is a model specific register which details the current configuration +cores and threads in the package. Because of how Hyperthread and Core +configuration works works in firmware, the MSR it is de-facto constant and +will remain unchanged until the next system reset. + +It is a read only MSR (so unilaterally reject writes), but for now retain its +leaky-on-read properties. Further CPUID/MSR work is required before we can +start virtualising a consistent topology to the guest, and retaining the old +behaviour is the safest course of action. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich + +diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c +index 4df4a59..a7f67d9 100644 +--- a/xen/arch/x86/msr.c ++++ b/xen/arch/x86/msr.c +@@ -200,6 +200,10 @@ int guest_rdmsr(const struct vcpu *v, uint32_t msr, uint64_t *val) + ARRAY_SIZE(msrs->dr_mask))]; + break; + ++ /* ++ * TODO: Implement when we have better topology representation. ++ case MSR_INTEL_CORE_THREAD_COUNT: ++ */ + default: + return X86EMUL_UNHANDLEABLE; + } +@@ -229,6 +233,7 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val) + { + uint64_t rsvd; + ++ case MSR_INTEL_CORE_THREAD_COUNT: + case MSR_INTEL_PLATFORM_INFO: + case MSR_ARCH_CAPABILITIES: + /* Read-only */ +diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h +index 11512d4..389f95f 100644 +--- a/xen/include/asm-x86/msr-index.h ++++ b/xen/include/asm-x86/msr-index.h +@@ -32,6 +32,10 @@ + #define EFER_KNOWN_MASK (EFER_SCE | EFER_LME | EFER_LMA | EFER_NX | \ + EFER_SVME | EFER_FFXSE) + ++#define MSR_INTEL_CORE_THREAD_COUNT 0x00000035 ++#define MSR_CTC_THREAD_MASK 0x0000ffff ++#define MSR_CTC_CORE_MASK 0xffff0000 ++ + /* Speculation Controls. */ + #define MSR_SPEC_CTRL 0x00000048 + #define SPEC_CTRL_IBRS (_AC(1, ULL) << 0) Index: head/sysutils/xen-tools/files/xsa297-4.12-3.patch =================================================================== --- head/sysutils/xen-tools/files/xsa297-4.12-3.patch +++ head/sysutils/xen-tools/files/xsa297-4.12-3.patch @@ -0,0 +1,109 @@ +From: Andrew Cooper +Subject: x86/boot: Detect the firmware SMT setting correctly on Intel hardware + +While boot_cpu_data.x86_num_siblings is an accurate value to use on AMD +hardware, it isn't on Intel when the user has disabled Hyperthreading in the +firmware. As a result, a user which has chosen to disable HT still gets +nagged on L1TF-vulnerable hardware when they haven't chosen an explicit +smt= setting. + +Make use of the largely-undocumented MSR_INTEL_CORE_THREAD_COUNT which in +practice exists since Nehalem, when booting on real hardware. Fall back to +using the ACPI table APIC IDs. + +While adjusting this logic, fix a latent bug in amd_get_topology(). The +thread count field in CPUID.0x8000001e.ebx is documented as 8 bits wide, +rather than 2 bits wide. + +Signed-off-by: Andrew Cooper +Acked-by: Jan Beulich + +diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c +index c790416..b1debac 100644 +--- a/xen/arch/x86/cpu/amd.c ++++ b/xen/arch/x86/cpu/amd.c +@@ -507,7 +507,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c) + u32 eax, ebx, ecx, edx; + + cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); +- c->x86_num_siblings = ((ebx >> 8) & 0x3) + 1; ++ c->x86_num_siblings = ((ebx >> 8) & 0xff) + 1; + + if (c->x86 < 0x17) + c->compute_unit_id = ebx & 0xFF; +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 99310c8..e49ab3f 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -368,6 +368,45 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) + #endif + } + ++static bool __init check_smt_enabled(void) ++{ ++ uint64_t val; ++ unsigned int cpu; ++ ++ /* ++ * x86_num_siblings defaults to 1 in the absence of other information, and ++ * is adjusted based on other topology information found in CPUID leaves. ++ * ++ * On AMD hardware, it will be the current SMT configuration. On Intel ++ * hardware, it will represent the maximum capability, rather than the ++ * current configuration. ++ */ ++ if ( boot_cpu_data.x86_num_siblings < 2 ) ++ return false; ++ ++ /* ++ * Intel Nehalem and later hardware does have an MSR which reports the ++ * current count of cores/threads in the package. ++ * ++ * At the time of writing, it is almost completely undocumented, so isn't ++ * virtualised reliably. ++ */ ++ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && !cpu_has_hypervisor && ++ !rdmsr_safe(MSR_INTEL_CORE_THREAD_COUNT, val) ) ++ return (MASK_EXTR(val, MSR_CTC_CORE_MASK) != ++ MASK_EXTR(val, MSR_CTC_THREAD_MASK)); ++ ++ /* ++ * Search over the CPUs reported in the ACPI tables. Any whose APIC ID ++ * has a non-zero thread id component indicates that SMT is active. ++ */ ++ for_each_present_cpu ( cpu ) ++ if ( x86_cpu_to_apicid[cpu] & (boot_cpu_data.x86_num_siblings - 1) ) ++ return true; ++ ++ return false; ++} ++ + /* Calculate whether Retpoline is known-safe on this CPU. */ + static bool __init retpoline_safe(uint64_t caps) + { +@@ -697,12 +736,14 @@ static __init void l1tf_calculations(uint64_t caps) + void __init init_speculation_mitigations(void) + { + enum ind_thunk thunk = THUNK_DEFAULT; +- bool use_spec_ctrl = false, ibrs = false; ++ bool use_spec_ctrl = false, ibrs = false, hw_smt_enabled; + uint64_t caps = 0; + + if ( boot_cpu_has(X86_FEATURE_ARCH_CAPS) ) + rdmsrl(MSR_ARCH_CAPABILITIES, caps); + ++ hw_smt_enabled = check_smt_enabled(); ++ + /* + * Has the user specified any custom BTI mitigations? If so, follow their + * instructions exactly and disable all heuristics. +@@ -873,8 +914,7 @@ void __init init_speculation_mitigations(void) + * However, if we are on affected hardware, with HT enabled, and the user + * hasn't explicitly chosen whether to use HT or not, nag them to do so. + */ +- if ( opt_smt == -1 && cpu_has_bug_l1tf && !pv_shim && +- boot_cpu_data.x86_num_siblings > 1 ) ++ if ( opt_smt == -1 && cpu_has_bug_l1tf && !pv_shim && hw_smt_enabled ) + warning_add( + "Booted on L1TF-vulnerable hardware with SMT/Hyperthreading\n" + "enabled. Please assess your configuration and choose an\n" Index: head/sysutils/xen-tools/files/xsa297-4.12-4.patch =================================================================== --- head/sysutils/xen-tools/files/xsa297-4.12-4.patch +++ head/sysutils/xen-tools/files/xsa297-4.12-4.patch @@ -0,0 +1,55 @@ +From: Andrew Cooper +Subject: x86/spec-ctrl: Misc non-functional cleanup + + * Identify BTI in the spec_ctrl_{enter,exit}_idle() comments, as other + mitigations will shortly appear. + * Use alternative_input() and cover the lack of memory cobber with a further + barrier. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h +index 779da2b..20ee112 100644 +--- a/xen/include/asm-x86/spec_ctrl.h ++++ b/xen/include/asm-x86/spec_ctrl.h +@@ -68,6 +68,8 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info) + uint32_t val = 0; + + /* ++ * Branch Target Injection: ++ * + * Latch the new shadow value, then enable shadowing, then update the MSR. + * There are no SMP issues here; only local processor ordering concerns. + */ +@@ -75,8 +77,9 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info) + barrier(); + info->spec_ctrl_flags |= SCF_use_shadow; + barrier(); +- asm volatile ( ALTERNATIVE("", "wrmsr", X86_FEATURE_SC_MSR_IDLE) +- :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" ); ++ alternative_input("", "wrmsr", X86_FEATURE_SC_MSR_IDLE, ++ "a" (val), "c" (MSR_SPEC_CTRL), "d" (0)); ++ barrier(); + } + + /* WARNING! `ret`, `call *`, `jmp *` not safe before this call. */ +@@ -85,13 +88,16 @@ static always_inline void spec_ctrl_exit_idle(struct cpu_info *info) + uint32_t val = info->xen_spec_ctrl; + + /* ++ * Branch Target Injection: ++ * + * Disable shadowing before updating the MSR. There are no SMP issues + * here; only local processor ordering concerns. + */ + info->spec_ctrl_flags &= ~SCF_use_shadow; + barrier(); +- asm volatile ( ALTERNATIVE("", "wrmsr", X86_FEATURE_SC_MSR_IDLE) +- :: "a" (val), "c" (MSR_SPEC_CTRL), "d" (0) : "memory" ); ++ alternative_input("", "wrmsr", X86_FEATURE_SC_MSR_IDLE, ++ "a" (val), "c" (MSR_SPEC_CTRL), "d" (0)); ++ barrier(); + } + + #endif /* __ASSEMBLY__ */ Index: head/sysutils/xen-tools/files/xsa297-4.12-5.patch =================================================================== --- head/sysutils/xen-tools/files/xsa297-4.12-5.patch +++ head/sysutils/xen-tools/files/xsa297-4.12-5.patch @@ -0,0 +1,147 @@ +From: Andrew Cooper +Subject: x86/spec-ctrl: CPUID/MSR definitions for Microarchitectural Data + Sampling + +The MD_CLEAR feature can be automatically offered to guests. No +infrastructure is needed in Xen to support the guest making use of it. + +This is part of XSA-297, CVE-2018-12126, CVE-2018-12127, CVE-2018-12130, CVE-2019-11091. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc +index 6db82f3..f80d8d8 100644 +--- a/docs/misc/xen-command-line.pandoc ++++ b/docs/misc/xen-command-line.pandoc +@@ -483,7 +483,7 @@ accounting for hardware capabilities as enumerated via CPUID. + + Currently accepted: + +-The Speculation Control hardware features `ibrsb`, `stibp`, `ibpb`, ++The Speculation Control hardware features `md-clear`, `ibrsb`, `stibp`, `ibpb`, + `l1d-flush` and `ssbd` are used by default if available and applicable. They can + be ignored, e.g. `no-ibrsb`, at which point Xen won't use them itself, and + won't offer them to guests. +diff --git a/tools/libxl/libxl_cpuid.c b/tools/libxl/libxl_cpuid.c +index 52e16c2..5a1702d 100644 +--- a/tools/libxl/libxl_cpuid.c ++++ b/tools/libxl/libxl_cpuid.c +@@ -202,6 +202,7 @@ int libxl_cpuid_parse_config(libxl_cpuid_policy_list *cpuid, const char* str) + + {"avx512-4vnniw",0x00000007, 0, CPUID_REG_EDX, 2, 1}, + {"avx512-4fmaps",0x00000007, 0, CPUID_REG_EDX, 3, 1}, ++ {"md-clear", 0x00000007, 0, CPUID_REG_EDX, 10, 1}, + {"ibrsb", 0x00000007, 0, CPUID_REG_EDX, 26, 1}, + {"stibp", 0x00000007, 0, CPUID_REG_EDX, 27, 1}, + {"l1d-flush", 0x00000007, 0, CPUID_REG_EDX, 28, 1}, +diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c +index d87a72e..f67ecd3 100644 +--- a/tools/misc/xen-cpuid.c ++++ b/tools/misc/xen-cpuid.c +@@ -146,6 +146,7 @@ static const char *str_7d0[32] = + { + [ 2] = "avx512_4vnniw", [ 3] = "avx512_4fmaps", + ++ [10] = "md-clear", + /* 12 */ [13] = "tsx-force-abort", + + [26] = "ibrsb", [27] = "stibp", +diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c +index ab0aab6..3efad9c 100644 +--- a/xen/arch/x86/cpuid.c ++++ b/xen/arch/x86/cpuid.c +@@ -29,7 +29,12 @@ static int __init parse_xen_cpuid(const char *s) + if ( !ss ) + ss = strchr(s, '\0'); + +- if ( (val = parse_boolean("ibpb", s, ss)) >= 0 ) ++ if ( (val = parse_boolean("md-clear", s, ss)) >= 0 ) ++ { ++ if ( !val ) ++ setup_clear_cpu_cap(X86_FEATURE_MD_CLEAR); ++ } ++ else if ( (val = parse_boolean("ibpb", s, ss)) >= 0 ) + { + if ( !val ) + setup_clear_cpu_cap(X86_FEATURE_IBPB); +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index e49ab3f..a573b02 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -291,17 +291,19 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) + printk("Speculative mitigation facilities:\n"); + + /* Hardware features which pertain to speculative mitigations. */ +- printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s\n", ++ printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s\n", + (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBRS/IBPB" : "", + (_7d0 & cpufeat_mask(X86_FEATURE_STIBP)) ? " STIBP" : "", + (_7d0 & cpufeat_mask(X86_FEATURE_L1D_FLUSH)) ? " L1D_FLUSH" : "", + (_7d0 & cpufeat_mask(X86_FEATURE_SSBD)) ? " SSBD" : "", ++ (_7d0 & cpufeat_mask(X86_FEATURE_MD_CLEAR)) ? " MD_CLEAR" : "", + (e8b & cpufeat_mask(X86_FEATURE_IBPB)) ? " IBPB" : "", + (caps & ARCH_CAPS_IBRS_ALL) ? " IBRS_ALL" : "", + (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "", + (caps & ARCH_CAPS_RSBA) ? " RSBA" : "", + (caps & ARCH_CAPS_SKIP_L1DFL) ? " SKIP_L1DFL": "", +- (caps & ARCH_CAPS_SSB_NO) ? " SSB_NO" : ""); ++ (caps & ARCH_CAPS_SSB_NO) ? " SSB_NO" : "", ++ (caps & ARCH_CAPS_MDS_NO) ? " MDS_NO" : ""); + + /* Compiled-in support which pertains to mitigations. */ + if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) ) +@@ -339,23 +341,25 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) + * mitigation support for guests. + */ + #ifdef CONFIG_HVM +- printk(" Support for HVM VMs:%s%s%s%s\n", ++ printk(" Support for HVM VMs:%s%s%s%s%s\n", + (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) || + boot_cpu_has(X86_FEATURE_SC_RSB_HVM) || + opt_eager_fpu) ? "" : " None", + boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ? " MSR_SPEC_CTRL" : "", + boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ? " RSB" : "", +- opt_eager_fpu ? " EAGER_FPU" : ""); ++ opt_eager_fpu ? " EAGER_FPU" : "", ++ boot_cpu_has(X86_FEATURE_MD_CLEAR) ? " MD_CLEAR" : ""); + + #endif + #ifdef CONFIG_PV +- printk(" Support for PV VMs:%s%s%s%s\n", ++ printk(" Support for PV VMs:%s%s%s%s%s\n", + (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || + boot_cpu_has(X86_FEATURE_SC_RSB_PV) || + opt_eager_fpu) ? "" : " None", + boot_cpu_has(X86_FEATURE_SC_MSR_PV) ? " MSR_SPEC_CTRL" : "", + boot_cpu_has(X86_FEATURE_SC_RSB_PV) ? " RSB" : "", +- opt_eager_fpu ? " EAGER_FPU" : ""); ++ opt_eager_fpu ? " EAGER_FPU" : "", ++ boot_cpu_has(X86_FEATURE_MD_CLEAR) ? " MD_CLEAR" : ""); + + printk(" XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n", + opt_xpti_hwdom ? "enabled" : "disabled", +diff --git a/xen/include/asm-x86/msr-index.h b/xen/include/asm-x86/msr-index.h +index 389f95f..637259b 100644 +--- a/xen/include/asm-x86/msr-index.h ++++ b/xen/include/asm-x86/msr-index.h +@@ -51,6 +51,7 @@ + #define ARCH_CAPS_RSBA (_AC(1, ULL) << 2) + #define ARCH_CAPS_SKIP_L1DFL (_AC(1, ULL) << 3) + #define ARCH_CAPS_SSB_NO (_AC(1, ULL) << 4) ++#define ARCH_CAPS_MDS_NO (_AC(1, ULL) << 5) + + #define MSR_FLUSH_CMD 0x0000010b + #define FLUSH_CMD_L1D (_AC(1, ULL) << 0) +diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h +index 2bcc548..55231d4 100644 +--- a/xen/include/public/arch-x86/cpufeatureset.h ++++ b/xen/include/public/arch-x86/cpufeatureset.h +@@ -242,6 +242,7 @@ XEN_CPUFEATURE(IBPB, 8*32+12) /*A IBPB support only (no IBRS, used by + /* Intel-defined CPU features, CPUID level 0x00000007:0.edx, word 9 */ + XEN_CPUFEATURE(AVX512_4VNNIW, 9*32+ 2) /*A AVX512 Neural Network Instructions */ + XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single Precision */ ++XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*A VERW clears microarchitectural buffers */ + XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */ + XEN_CPUFEATURE(IBRSB, 9*32+26) /*A IBRS and IBPB support (used by Intel) */ + XEN_CPUFEATURE(STIBP, 9*32+27) /*A STIBP */ Index: head/sysutils/xen-tools/files/xsa297-4.12-6.patch =================================================================== --- head/sysutils/xen-tools/files/xsa297-4.12-6.patch +++ head/sysutils/xen-tools/files/xsa297-4.12-6.patch @@ -0,0 +1,134 @@ +From: Andrew Cooper +Subject: x86/spec-ctrl: Infrastructure to use VERW to flush pipeline buffers + +Three synthetic features are introduced, as we need individual control of +each, depending on circumstances. A later change will enable them at +appropriate points. + +The verw_sel field doesn't strictly need to live in struct cpu_info. It lives +there because there is a convenient hole it can fill, and it reduces the +complexity of the SPEC_CTRL_EXIT_TO_{PV,HVM} assembly by avoiding the need for +any temporary stack maintenance. + +This is part of XSA-297, CVE-2018-12126, CVE-2018-12127, CVE-2018-12130, CVE-2019-11091. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c +index 052228c..33930ce 100644 +--- a/xen/arch/x86/x86_64/asm-offsets.c ++++ b/xen/arch/x86/x86_64/asm-offsets.c +@@ -110,6 +110,7 @@ void __dummy__(void) + BLANK(); + + OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs); ++ OFFSET(CPUINFO_verw_sel, struct cpu_info, verw_sel); + OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu); + OFFSET(CPUINFO_cr4, struct cpu_info, cr4); + OFFSET(CPUINFO_xen_cr3, struct cpu_info, xen_cr3); +diff --git a/xen/include/asm-x86/cpufeatures.h b/xen/include/asm-x86/cpufeatures.h +index 0c06274..ba55245 100644 +--- a/xen/include/asm-x86/cpufeatures.h ++++ b/xen/include/asm-x86/cpufeatures.h +@@ -31,3 +31,6 @@ XEN_CPUFEATURE(SC_RSB_PV, (FSCAPINTS+0)*32+18) /* RSB overwrite needed for + XEN_CPUFEATURE(SC_RSB_HVM, (FSCAPINTS+0)*32+19) /* RSB overwrite needed for HVM */ + XEN_CPUFEATURE(SC_MSR_IDLE, (FSCAPINTS+0)*32+21) /* (SC_MSR_PV || SC_MSR_HVM) && default_xen_spec_ctrl */ + XEN_CPUFEATURE(XEN_LBR, (FSCAPINTS+0)*32+22) /* Xen uses MSR_DEBUGCTL.LBR */ ++XEN_CPUFEATURE(SC_VERW_PV, (FSCAPINTS+0)*32+23) /* VERW used by Xen for PV */ ++XEN_CPUFEATURE(SC_VERW_HVM, (FSCAPINTS+0)*32+24) /* VERW used by Xen for HVM */ ++XEN_CPUFEATURE(SC_VERW_IDLE, (FSCAPINTS+0)*32+25) /* VERW used by Xen for idle */ +diff --git a/xen/include/asm-x86/current.h b/xen/include/asm-x86/current.h +index 5bd64b2..f3508c3 100644 +--- a/xen/include/asm-x86/current.h ++++ b/xen/include/asm-x86/current.h +@@ -38,6 +38,7 @@ struct vcpu; + struct cpu_info { + struct cpu_user_regs guest_cpu_user_regs; + unsigned int processor_id; ++ unsigned int verw_sel; + struct vcpu *current_vcpu; + unsigned long per_cpu_offset; + unsigned long cr4; +diff --git a/xen/include/asm-x86/spec_ctrl.h b/xen/include/asm-x86/spec_ctrl.h +index 20ee112..ba03bb4 100644 +--- a/xen/include/asm-x86/spec_ctrl.h ++++ b/xen/include/asm-x86/spec_ctrl.h +@@ -60,6 +60,13 @@ static inline void init_shadow_spec_ctrl_state(void) + info->shadow_spec_ctrl = 0; + info->xen_spec_ctrl = default_xen_spec_ctrl; + info->spec_ctrl_flags = default_spec_ctrl_flags; ++ ++ /* ++ * For least latency, the VERW selector should be a writeable data ++ * descriptor resident in the cache. __HYPERVISOR_DS32 shares a cache ++ * line with __HYPERVISOR_CS, so is expected to be very cache-hot. ++ */ ++ info->verw_sel = __HYPERVISOR_DS32; + } + + /* WARNING! `ret`, `call *`, `jmp *` not safe after this call. */ +@@ -80,6 +87,22 @@ static always_inline void spec_ctrl_enter_idle(struct cpu_info *info) + alternative_input("", "wrmsr", X86_FEATURE_SC_MSR_IDLE, + "a" (val), "c" (MSR_SPEC_CTRL), "d" (0)); + barrier(); ++ ++ /* ++ * Microarchitectural Store Buffer Data Sampling: ++ * ++ * On vulnerable systems, store buffer entries are statically partitioned ++ * between active threads. When entering idle, our store buffer entries ++ * are re-partitioned to allow the other threads to use them. ++ * ++ * Flush the buffers to ensure that no sensitive data of ours can be ++ * leaked by a sibling after it gets our store buffer entries. ++ * ++ * Note: VERW must be encoded with a memory operand, as it is only that ++ * form which causes a flush. ++ */ ++ alternative_input("", "verw %[sel]", X86_FEATURE_SC_VERW_IDLE, ++ [sel] "m" (info->verw_sel)); + } + + /* WARNING! `ret`, `call *`, `jmp *` not safe before this call. */ +@@ -98,6 +121,17 @@ static always_inline void spec_ctrl_exit_idle(struct cpu_info *info) + alternative_input("", "wrmsr", X86_FEATURE_SC_MSR_IDLE, + "a" (val), "c" (MSR_SPEC_CTRL), "d" (0)); + barrier(); ++ ++ /* ++ * Microarchitectural Store Buffer Data Sampling: ++ * ++ * On vulnerable systems, store buffer entries are statically partitioned ++ * between active threads. When exiting idle, the other threads store ++ * buffer entries are re-partitioned to give us some. ++ * ++ * We now have store buffer entries with stale data from sibling threads. ++ * A flush if necessary will be performed on the return to guest path. ++ */ + } + + #endif /* __ASSEMBLY__ */ +diff --git a/xen/include/asm-x86/spec_ctrl_asm.h b/xen/include/asm-x86/spec_ctrl_asm.h +index 803f7ce..c60093b 100644 +--- a/xen/include/asm-x86/spec_ctrl_asm.h ++++ b/xen/include/asm-x86/spec_ctrl_asm.h +@@ -241,12 +241,16 @@ + /* Use when exiting to PV guest context. */ + #define SPEC_CTRL_EXIT_TO_PV \ + ALTERNATIVE "", \ +- DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV ++ DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV; \ ++ ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), \ ++ X86_FEATURE_SC_VERW_PV + + /* Use when exiting to HVM guest context. */ + #define SPEC_CTRL_EXIT_TO_HVM \ + ALTERNATIVE "", \ +- DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_HVM ++ DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_HVM; \ ++ ALTERNATIVE "", __stringify(verw CPUINFO_verw_sel(%rsp)), \ ++ X86_FEATURE_SC_VERW_HVM + + /* + * Use in IST interrupt/exception context. May interrupt Xen or PV context. Index: head/sysutils/xen-tools/files/xsa297-4.12-7.patch =================================================================== --- head/sysutils/xen-tools/files/xsa297-4.12-7.patch +++ head/sysutils/xen-tools/files/xsa297-4.12-7.patch @@ -0,0 +1,316 @@ +From: Andrew Cooper +Subject: x86/spec-ctrl: Introduce options to control VERW flushing + +The Microarchitectural Data Sampling vulnerability is split into categories +with subtly different properties: + + MLPDS - Microarchitectural Load Port Data Sampling + MSBDS - Microarchitectural Store Buffer Data Sampling + MFBDS - Microarchitectural Fill Buffer Data Sampling + MDSUM - Microarchitectural Data Sampling Uncacheable Memory + +MDSUM is a special case of the other three, and isn't distinguished further. + +These issues pertain to three microarchitectural buffers. The Load Ports, the +Store Buffers and the Fill Buffers. Each of these structures are flushed by +the new enhanced VERW functionality, but the conditions under which flushing +is necessary vary. + +For this concise overview of the issues and default logic, the abbreviations +SP (Store Port), FB (Fill Buffer), LP (Load Port) and HT (Hyperthreading) are +used for brevity: + + * Vulnerable hardware is divided into two categories - parts which suffer + from SP only, and parts with any other combination of vulnerabilities. + + * SP only has an HT interaction when the thread goes idle, due to the static + partitioning of resources. LP and FB have HT interactions at all points, + due to the competitive sharing of resources. All issues potentially leak + data across the return-to-guest transition. + + * The microcode which implements VERW flushing also extends MSR_FLUSH_CMD, so + we don't need to do both on the HVM return-to-guest path. However, some + parts are not vulnerable to L1TF (therefore have no MSR_FLUSH_CMD), but are + vulnerable to MDS, so do require VERW on the HVM path. + +Note that we deliberately support mds=1 even without MD_CLEAR in case the +microcode has been updated but the feature bit not exposed. + +This is part of XSA-297, CVE-2018-12126, CVE-2018-12127, CVE-2018-12130, CVE-2019-11091. + +Signed-off-by: Andrew Cooper +Reviewed-by: Jan Beulich + +diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc +index f80d8d8..85081fd 100644 +--- a/docs/misc/xen-command-line.pandoc ++++ b/docs/misc/xen-command-line.pandoc +@@ -1895,7 +1895,7 @@ not be able to control the state of the mitigation. + By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). + + ### spec-ctrl (x86) +-> `= List of [ , xen=, {pv,hvm,msr-sc,rsb}=, ++> `= List of [ , xen=, {pv,hvm,msr-sc,rsb,md-clear}=, + > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,eager-fpu, + > l1d-flush}= ]` + +@@ -1919,9 +1919,10 @@ in place for guests to use. + + Use of a positive boolean value for either of these options is invalid. + +-The booleans `pv=`, `hvm=`, `msr-sc=` and `rsb=` offer fine grained control +-over the alternative blocks used by Xen. These impact Xen's ability to +-protect itself, and Xen's ability to virtualise support for guests to use. ++The booleans `pv=`, `hvm=`, `msr-sc=`, `rsb=` and `md-clear=` offer fine ++grained control over the alternative blocks used by Xen. These impact Xen's ++ability to protect itself, and Xen's ability to virtualise support for guests ++to use. + + * `pv=` and `hvm=` offer control over all suboptions for PV and HVM guests + respectively. +@@ -1930,6 +1931,11 @@ protect itself, and Xen's ability to virtualise support for guests to use. + guests and if disabled, guests will be unable to use IBRS/STIBP/SSBD/etc. + * `rsb=` offers control over whether to overwrite the Return Stack Buffer / + Return Address Stack on entry to Xen. ++* `md-clear=` offers control over whether to use VERW to flush ++ microarchitectural buffers on idle and exit from Xen. *Note: For ++ compatibility with development versions of this fix, `mds=` is also accepted ++ on Xen 4.12 and earlier as an alias. Consult vendor documentation in ++ preference to here.* + + If Xen was compiled with INDIRECT_THUNK support, `bti-thunk=` can be used to + select which of the thunks gets patched into the `__x86_indirect_thunk_%reg` +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index a573b02..0509ac8 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -35,6 +35,8 @@ static bool __initdata opt_msr_sc_pv = true; + static bool __initdata opt_msr_sc_hvm = true; + static bool __initdata opt_rsb_pv = true; + static bool __initdata opt_rsb_hvm = true; ++static int8_t __initdata opt_md_clear_pv = -1; ++static int8_t __initdata opt_md_clear_hvm = -1; + + /* Cmdline controls for Xen's speculative settings. */ + static enum ind_thunk { +@@ -59,6 +61,9 @@ paddr_t __read_mostly l1tf_addr_mask, __read_mostly l1tf_safe_maddr; + static bool __initdata cpu_has_bug_l1tf; + static unsigned int __initdata l1d_maxphysaddr; + ++static bool __initdata cpu_has_bug_msbds_only; /* => minimal HT impact. */ ++static bool __initdata cpu_has_bug_mds; /* Any other M{LP,SB,FB}DS combination. */ ++ + static int __init parse_spec_ctrl(const char *s) + { + const char *ss; +@@ -94,6 +99,8 @@ static int __init parse_spec_ctrl(const char *s) + disable_common: + opt_rsb_pv = false; + opt_rsb_hvm = false; ++ opt_md_clear_pv = 0; ++ opt_md_clear_hvm = 0; + + opt_thunk = THUNK_JMP; + opt_ibrs = 0; +@@ -116,11 +123,13 @@ static int __init parse_spec_ctrl(const char *s) + { + opt_msr_sc_pv = val; + opt_rsb_pv = val; ++ opt_md_clear_pv = val; + } + else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) + { + opt_msr_sc_hvm = val; + opt_rsb_hvm = val; ++ opt_md_clear_hvm = val; + } + else if ( (val = parse_boolean("msr-sc", s, ss)) >= 0 ) + { +@@ -132,6 +141,12 @@ static int __init parse_spec_ctrl(const char *s) + opt_rsb_pv = val; + opt_rsb_hvm = val; + } ++ else if ( (val = parse_boolean("md-clear", s, ss)) >= 0 || ++ (val = parse_boolean("mds", s, ss)) >= 0 ) ++ { ++ opt_md_clear_pv = val; ++ opt_md_clear_hvm = val; ++ } + + /* Xen's speculative sidechannel mitigation settings. */ + else if ( !strncmp(s, "bti-thunk=", 10) ) +@@ -317,7 +332,7 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) + "\n"); + + /* Settings for Xen's protection, irrespective of guests. */ +- printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s, Other:%s%s\n", ++ printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s, Other:%s%s%s\n", + thunk == THUNK_NONE ? "N/A" : + thunk == THUNK_RETPOLINE ? "RETPOLINE" : + thunk == THUNK_LFENCE ? "LFENCE" : +@@ -327,7 +342,8 @@ static void __init print_details(enum ind_thunk thunk, uint64_t caps) + !boot_cpu_has(X86_FEATURE_SSBD) ? "" : + (default_xen_spec_ctrl & SPEC_CTRL_SSBD) ? " SSBD+" : " SSBD-", + opt_ibpb ? " IBPB" : "", +- opt_l1d_flush ? " L1D_FLUSH" : ""); ++ opt_l1d_flush ? " L1D_FLUSH" : "", ++ opt_md_clear_pv || opt_md_clear_hvm ? " VERW" : ""); + + /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */ + if ( cpu_has_bug_l1tf || opt_pv_l1tf_hwdom || opt_pv_l1tf_domu ) +@@ -737,6 +753,107 @@ static __init void l1tf_calculations(uint64_t caps) + : (3ul << (paddr_bits - 2)))); + } + ++/* Calculate whether this CPU is vulnerable to MDS. */ ++static __init void mds_calculations(uint64_t caps) ++{ ++ /* MDS is only known to affect Intel Family 6 processors at this time. */ ++ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || ++ boot_cpu_data.x86 != 6 ) ++ return; ++ ++ /* Any processor advertising MDS_NO should be not vulnerable to MDS. */ ++ if ( caps & ARCH_CAPS_MDS_NO ) ++ return; ++ ++ switch ( boot_cpu_data.x86_model ) ++ { ++ /* ++ * Core processors since at least Nehalem are vulnerable. ++ */ ++ case 0x1f: /* Auburndale / Havendale */ ++ case 0x1e: /* Nehalem */ ++ case 0x1a: /* Nehalem EP */ ++ case 0x2e: /* Nehalem EX */ ++ case 0x25: /* Westmere */ ++ case 0x2c: /* Westmere EP */ ++ case 0x2f: /* Westmere EX */ ++ case 0x2a: /* SandyBridge */ ++ case 0x2d: /* SandyBridge EP/EX */ ++ case 0x3a: /* IvyBridge */ ++ case 0x3e: /* IvyBridge EP/EX */ ++ case 0x3c: /* Haswell */ ++ case 0x3f: /* Haswell EX/EP */ ++ case 0x45: /* Haswell D */ ++ case 0x46: /* Haswell H */ ++ case 0x3d: /* Broadwell */ ++ case 0x47: /* Broadwell H */ ++ case 0x4f: /* Broadwell EP/EX */ ++ case 0x56: /* Broadwell D */ ++ case 0x4e: /* Skylake M */ ++ case 0x5e: /* Skylake D */ ++ cpu_has_bug_mds = true; ++ break; ++ ++ /* ++ * Some Core processors have per-stepping vulnerability. ++ */ ++ case 0x55: /* Skylake-X / Cascade Lake */ ++ if ( boot_cpu_data.x86_mask <= 5 ) ++ cpu_has_bug_mds = true; ++ break; ++ ++ case 0x8e: /* Kaby / Coffee / Whiskey Lake M */ ++ if ( boot_cpu_data.x86_mask <= 0xb ) ++ cpu_has_bug_mds = true; ++ break; ++ ++ case 0x9e: /* Kaby / Coffee / Whiskey Lake D */ ++ if ( boot_cpu_data.x86_mask <= 0xc ) ++ cpu_has_bug_mds = true; ++ break; ++ ++ /* ++ * Very old and very new Atom processors are not vulnerable. ++ */ ++ case 0x1c: /* Pineview */ ++ case 0x26: /* Lincroft */ ++ case 0x27: /* Penwell */ ++ case 0x35: /* Cloverview */ ++ case 0x36: /* Cedarview */ ++ case 0x7a: /* Goldmont */ ++ break; ++ ++ /* ++ * Middling Atom processors are vulnerable to just the Store Buffer ++ * aspect. ++ */ ++ case 0x37: /* Baytrail / Valleyview (Silvermont) */ ++ case 0x4a: /* Merrifield */ ++ case 0x4c: /* Cherrytrail / Brasswell */ ++ case 0x4d: /* Avaton / Rangely (Silvermont) */ ++ case 0x5a: /* Moorefield */ ++ case 0x5d: ++ case 0x65: ++ case 0x6e: ++ case 0x75: ++ /* ++ * Knights processors (which are based on the Silvermont/Airmont ++ * microarchitecture) are similarly only affected by the Store Buffer ++ * aspect. ++ */ ++ case 0x57: /* Knights Landing */ ++ case 0x85: /* Knights Mill */ ++ cpu_has_bug_msbds_only = true; ++ break; ++ ++ default: ++ printk("Unrecognised CPU model %#x - assuming vulnerable to MDS\n", ++ boot_cpu_data.x86_model); ++ cpu_has_bug_mds = true; ++ break; ++ } ++} ++ + void __init init_speculation_mitigations(void) + { + enum ind_thunk thunk = THUNK_DEFAULT; +@@ -924,6 +1041,47 @@ void __init init_speculation_mitigations(void) + "enabled. Please assess your configuration and choose an\n" + "explicit 'smt=' setting. See XSA-273.\n"); + ++ mds_calculations(caps); ++ ++ /* ++ * By default, enable PV and HVM mitigations on MDS-vulnerable hardware. ++ * This will only be a token effort for MLPDS/MFBDS when HT is enabled, ++ * but it is somewhat better than nothing. ++ */ ++ if ( opt_md_clear_pv == -1 ) ++ opt_md_clear_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && ++ boot_cpu_has(X86_FEATURE_MD_CLEAR)); ++ if ( opt_md_clear_hvm == -1 ) ++ opt_md_clear_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && ++ boot_cpu_has(X86_FEATURE_MD_CLEAR)); ++ ++ /* ++ * Enable MDS defences as applicable. The PV blocks need using all the ++ * time, and the Idle blocks need using if either PV or HVM defences are ++ * used. ++ * ++ * HVM is more complicated. The MD_CLEAR microcode extends L1D_FLUSH with ++ * equivelent semantics to avoid needing to perform both flushes on the ++ * HVM path. The HVM blocks don't need activating if our hypervisor told ++ * us it was handling L1D_FLUSH, or we are using L1D_FLUSH ourselves. ++ */ ++ if ( opt_md_clear_pv ) ++ setup_force_cpu_cap(X86_FEATURE_SC_VERW_PV); ++ if ( opt_md_clear_pv || opt_md_clear_hvm ) ++ setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE); ++ if ( opt_md_clear_hvm && !(caps & ARCH_CAPS_SKIP_L1DFL) && !opt_l1d_flush ) ++ setup_force_cpu_cap(X86_FEATURE_SC_VERW_HVM); ++ ++ /* ++ * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT ++ * active and no explicit SMT choice. ++ */ ++ if ( opt_smt == -1 && cpu_has_bug_mds && hw_smt_enabled ) ++ warning_add( ++ "Booted on MLPDS/MFBDS-vulnerable hardware with SMT/Hyperthreading\n" ++ "enabled. Mitigations will not be fully effective. Please\n" ++ "choose an explicit smt= setting. See XSA-297.\n"); ++ + print_details(thunk, caps); + + /*