Index: sys/x86/include/specialreg.h =================================================================== --- sys/x86/include/specialreg.h +++ sys/x86/include/specialreg.h @@ -704,6 +704,22 @@ #define MC_MISC_ADDRESS_MODE 0x00000000000001c0 /* If MCG_CAP_SER_P */ #define MC_CTL2_THRESHOLD 0x0000000000007fff #define MC_CTL2_CMCI_EN 0x0000000040000000 +#define MC_AMDNB_BANK 4 +#define MC_MISC_AMDNB_VAL 0x8000000000000000 /* Counter presence valid */ +#define MC_MISC_AMDNB_CNTP 0x4000000000000000 /* Counter present */ +#define MC_MISC_AMDNB_LOCK 0x2000000000000000 /* Register locked */ +#define MC_MISC_AMDNB_LVT_MASK 0x00f0000000000000 /* Extended LVT offset */ +#define MC_MISC_AMDNB_LVT_SHIFT 52 +#define MC_MISC_AMDNB_CNTEN 0x0008000000000000 /* Counter enabled */ +#define MC_MISC_AMDNB_INT_MASK 0x0006000000000000 /* Interrupt type */ +#define MC_MISC_AMDNB_INT_LVT 0x0002000000000000 /* Interrupt via Extended LVT */ +#define MC_MISC_AMDNB_INT_SMI 0x0004000000000000 /* SMI */ +#define MC_MISC_AMDNB_OVERFLW 0x0001000000000000 /* Counter overflow */ +#define MC_MISC_AMDNB_CNT_MASK 0x00000fff00000000 /* Counter value */ +#define MC_MISC_AMDNB_CNT_SHIFT 32 +#define MC_MISC_AMDNB_CNT_MAX 0xfff +#define MC_MISC_AMDNB_PTR_MASK 0x00000000ff000000 /* Pointer to additional registers */ +#define MC_MISC_AMDNB_PTR_SHIFT 24 /* * The following four 3-byte registers control the non-cacheable regions. Index: sys/x86/x86/mca.c =================================================================== --- sys/x86/x86/mca.c +++ sys/x86/x86/mca.c @@ -73,7 +73,12 @@ */ struct cmc_state { int max_threshold; - int last_intr; + time_t last_intr; +}; + +struct amd_et_state { + int cur_threshold; + time_t last_intr; }; #endif @@ -118,7 +123,8 @@ static struct mtx mca_lock; #ifdef DEV_APIC -static struct cmc_state **cmc_state; /* Indexed by cpuid, bank */ +static struct cmc_state **cmc_state; /* Indexed by cpuid, bank. */ +static struct amd_et_state *amd_et_state; /* Indexed by cpuid. */ static int cmc_throttle = 60; /* Time in seconds to throttle CMCI. */ #endif @@ -521,19 +527,15 @@ * cmc_throttle seconds or the periodic scan. If a periodic scan * finds that the threshold is too high, it is lowered. */ -static void -cmci_update(enum scan_mode mode, int bank, int valid, struct mca_record *rec) +static int +update_threshold(enum scan_mode mode, int valid, int last_intr, int count, + int cur_threshold, int max_threshold) { - struct cmc_state *cc; - uint64_t ctl; u_int delta; - int count, limit; + int limit; - /* Fetch the current limit for this bank. */ - cc = &cmc_state[PCPU_GET(cpuid)][bank]; - ctl = rdmsr(MSR_MC_CTL2(bank)); - count = (rec->mr_status & MC_STATUS_COR_COUNT) >> 38; - delta = (u_int)(time_uptime - cc->last_intr); + delta = (u_int)(time_uptime - last_intr); + limit = cur_threshold; /* * If an interrupt was received less than cmc_throttle seconds @@ -542,16 +544,11 @@ * double the threshold up to the max. */ if (mode == CMCI && valid) { - limit = ctl & MC_CTL2_THRESHOLD; if (delta < cmc_throttle && count >= limit && - limit < cc->max_threshold) { - limit = min(limit << 1, cc->max_threshold); - ctl &= ~MC_CTL2_THRESHOLD; - ctl |= limit; - wrmsr(MSR_MC_CTL2(bank), ctl); + limit < max_threshold) { + limit = min(limit << 1, max_threshold); } - cc->last_intr = time_uptime; - return; + return (limit); } /* @@ -559,31 +556,81 @@ * should be lowered. */ if (mode != POLLED) - return; + return (limit); /* If a CMCI occured recently, do nothing for now. */ if (delta < cmc_throttle) - return; + return (limit); /* * Compute a new limit based on the average rate of events per * cmc_throttle seconds since the last interrupt. */ if (valid) { - count = (rec->mr_status & MC_STATUS_COR_COUNT) >> 38; limit = count * cmc_throttle / delta; if (limit <= 0) limit = 1; - else if (limit > cc->max_threshold) - limit = cc->max_threshold; - } else + else if (limit > max_threshold) + limit = max_threshold; + } else { limit = 1; - if ((ctl & MC_CTL2_THRESHOLD) != limit) { + } + return (limit); +} + +static void +cmci_update(enum scan_mode mode, int bank, int valid, struct mca_record *rec) +{ + struct cmc_state *cc; + uint64_t ctl; + int cur_threshold, new_threshold; + int count; + + /* Fetch the current limit for this bank. */ + cc = &cmc_state[PCPU_GET(cpuid)][bank]; + ctl = rdmsr(MSR_MC_CTL2(bank)); + count = (rec->mr_status & MC_STATUS_COR_COUNT) >> 38; + cur_threshold = ctl & MC_CTL2_THRESHOLD; + + new_threshold = update_threshold(mode, valid, cc->last_intr, count, + cur_threshold, cc->max_threshold); + + if (mode == CMCI && valid) + cc->last_intr = time_uptime; + if (new_threshold != cur_threshold) { ctl &= ~MC_CTL2_THRESHOLD; - ctl |= limit; + ctl |= new_threshold; wrmsr(MSR_MC_CTL2(bank), ctl); } } + +static void +amd_thresholding_update(enum scan_mode mode, int bank, int valid) +{ + struct amd_et_state *cc; + uint64_t misc; + int new_threshold; + int count; + + KASSERT(bank == MC_AMDNB_BANK, + ("%s: unexpected bank %d", __func__, bank)); + cc = &amd_et_state[PCPU_GET(cpuid)]; + misc = rdmsr(MSR_MC_MISC(bank)); + count = (misc & MC_MISC_AMDNB_CNT_MASK) >> MC_MISC_AMDNB_CNT_SHIFT; + count = count - (MC_MISC_AMDNB_CNT_MAX - cc->cur_threshold); + + new_threshold = update_threshold(mode, valid, cc->last_intr, count, + cc->cur_threshold, MC_MISC_AMDNB_CNT_MAX); + + cc->cur_threshold = new_threshold; + misc &= ~MC_MISC_AMDNB_CNT_MASK; + misc |= (uint64_t)(MC_MISC_AMDNB_CNT_MAX - cc->cur_threshold) + << MC_MISC_AMDNB_CNT_SHIFT; + misc &= ~MC_MISC_AMDNB_OVERFLW; + wrmsr(MSR_MC_MISC(bank), misc); + if (mode == CMCI && valid) + cc->last_intr = time_uptime; +} #endif /* @@ -638,8 +685,12 @@ * If this is a bank this CPU monitors via CMCI, * update the threshold. */ - if (PCPU_GET(cmci_mask) & 1 << i) - cmci_update(mode, i, valid, &rec); + if (PCPU_GET(cmci_mask) & 1 << i) { + if (cmc_state != NULL) + cmci_update(mode, i, valid, &rec); + else + amd_thresholding_update(mode, i, valid); + } #endif } if (mode == POLLED) @@ -751,6 +802,18 @@ &cmc_throttle, 0, sysctl_positive_int, "I", "Interval in seconds to throttle corrected MC interrupts"); } + +static void +amd_thresholding_setup(void) +{ + + amd_et_state = malloc((mp_maxid + 1) * sizeof(struct amd_et_state *), + M_MCA, M_WAITOK | M_ZERO); + SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, + "cmc_throttle", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, + &cmc_throttle, 0, sysctl_positive_int, "I", + "Interval in seconds to throttle corrected MC interrupts"); +} #endif static void @@ -789,6 +852,9 @@ #ifdef DEV_APIC if (mcg_cap & MCG_CAP_CMCI_P) cmci_setup(); + else if (cpu_vendor_id == CPU_VENDOR_AMD && + CPUID_TO_FAMILY(cpu_id) >= 0x10 && CPUID_TO_FAMILY(cpu_id) <= 16) + amd_thresholding_setup(); #endif } @@ -863,6 +929,101 @@ ctl |= MC_CTL2_CMCI_EN | 1; wrmsr(MSR_MC_CTL2(i), ctl); } + +static void +amd_thresholding_init() +{ + struct amd_et_state *cc; + uint64_t misc; + int elvt; + + /* The counter must be valid and present. */ + misc = rdmsr(MSR_MC_MISC(MC_AMDNB_BANK)); + if ((misc & (MC_MISC_AMDNB_VAL | MC_MISC_AMDNB_CNTP)) != + (MC_MISC_AMDNB_VAL | MC_MISC_AMDNB_CNTP)) + return; + + /* The register should not be locked. */ + if ((misc & MC_MISC_AMDNB_LOCK) != 0) + return; + + /* + * If counter is enabled then either the firmware or another CPU + * has already claimed it. + */ + if ((misc & MC_MISC_AMDNB_CNTEN) != 0) + return; + + /* + * Configure an Extended Interrupt LVT register for reporting + * counter overflows if that feature is supported and the first + * extended register is available. + */ + elvt = lapic_enable_mca_elvt(); + if (elvt < 0) + return; + + /* Re-use Intel CMC support infrastructure. */ + cc = &amd_et_state[PCPU_GET(cpuid)]; + + /* Construct a new value with the starting threshold of one. */ + cc->cur_threshold = 1; + misc &= ~MC_MISC_AMDNB_INT_MASK; + misc |= MC_MISC_AMDNB_INT_LVT; + misc &= ~MC_MISC_AMDNB_LVT_MASK; + misc |= (uint64_t)elvt << MC_MISC_AMDNB_LVT_SHIFT; + misc &= ~MC_MISC_AMDNB_CNT_MASK; + misc |= (uint64_t)(MC_MISC_AMDNB_CNT_MAX - cc->cur_threshold) + << MC_MISC_AMDNB_CNT_SHIFT; + misc &= ~MC_MISC_AMDNB_OVERFLW; + misc |= MC_MISC_AMDNB_CNTEN; + + wrmsr(MSR_MC_MISC(MC_AMDNB_BANK), misc); + + /* Mark the NB bank as monitored. */ + PCPU_SET(cmci_mask, PCPU_GET(cmci_mask) | 1 << MC_AMDNB_BANK); +} + +static void +amd_thresholding_resume() +{ + struct amd_et_state *cc; + uint64_t misc; + int elvt; + + /* Nothing to do if this CPU doesn't monitor the NB bank. */ + if ((PCPU_GET(cmci_mask) & 1 << MC_AMDNB_BANK) == 0) + return; + + cc = &amd_et_state[PCPU_GET(cpuid)]; + cc->last_intr = 0; + misc = rdmsr(MSR_MC_MISC(MC_AMDNB_BANK)); + + /* + * XXX Should we expect the ELVT to be configured? + * Should we reconfigure it? + * Should that be LAPIC's worry? + */ + elvt = lapic_enable_mca_elvt(); + if (elvt < 0) { + printf("MCA resume: can't setup AMD ELVT\n"); + return; + } + + /* Construct a new value with the starting threshold of one. */ + cc->cur_threshold = 1; + misc &= ~MC_MISC_AMDNB_INT_MASK; + misc |= MC_MISC_AMDNB_INT_LVT; + misc &= MC_MISC_AMDNB_LVT_MASK; + misc |= (uint64_t)elvt << MC_MISC_AMDNB_LVT_SHIFT; + misc &= ~MC_MISC_AMDNB_CNT_MASK; + misc |= (uint64_t)(MC_MISC_AMDNB_CNT_MAX - cc->cur_threshold) + << MC_MISC_AMDNB_CNT_SHIFT; + misc &= ~MC_MISC_AMDNB_OVERFLW; + misc |= MC_MISC_AMDNB_CNTEN; + + wrmsr(MSR_MC_MISC(MC_AMDNB_BANK), misc); +} #endif /* @@ -940,8 +1101,28 @@ } #ifdef DEV_APIC - if (PCPU_GET(cmci_mask) != 0 && boot) + /* + * AMD Processors from families 10h - 16h provide support + * for Machine Check Error Thresholding. + * The processors support counters of MC errors and they + * can be configured to generate an interrupt when a counter + * overflows. + * The counters are all associated with Bank 4 and each + * of them covers a group of errors reported via that bank. + * At the moment only the DRAM Error Threshold Group is + * supported. + */ + if (cpu_vendor_id == CPU_VENDOR_AMD && + CPUID_TO_FAMILY(cpu_id) >= 0x10 && + CPUID_TO_FAMILY(cpu_id) <= 0x16 && + (mcg_cap & MCG_CAP_COUNT) >= 4) { + if (boot) + amd_thresholding_init(); + else + amd_thresholding_resume(); + } else if (PCPU_GET(cmci_mask) != 0 && boot) { lapic_enable_cmc(); + } #endif }