Index: sys/x86/x86/mca.c =================================================================== --- sys/x86/x86/mca.c +++ sys/x86/x86/mca.c @@ -86,7 +86,6 @@ struct mca_internal { struct mca_record rec; - int logged; STAILQ_ENTRY(mca_internal) link; }; @@ -101,6 +100,7 @@ static volatile int mca_count; /* Number of records stored. */ static int mca_banks; /* Number of per-CPU register banks. */ +static int mca_maxcount = -1; /* Limit on records stored. (-1 = unlimited) */ static SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RD, NULL, "Machine Check Architecture"); @@ -122,9 +122,11 @@ &workaround_erratum383, 0, "Is the workaround for Erratum 383 on AMD Family 10h processors enabled?"); + static STAILQ_HEAD(, mca_internal) mca_freelist; static int mca_freecount; static STAILQ_HEAD(, mca_internal) mca_records; +static STAILQ_HEAD(, mca_internal) mca_pending; static struct callout mca_timer; static int mca_ticks = 3600; /* Check hourly by default. */ static struct taskqueue *mca_tq; @@ -560,16 +562,29 @@ mca_fill_freelist(void) { struct mca_internal *rec; - int desired; + int desired_high, desired_low; /* * Ensure we have at least one record for each bank and one - * record per CPU. + * record per CPU. Also, free excess entries which have crept onto + * our list. Do the frees first in case a concurrent thread begins + * using the free list entries while this thread is still working. */ - desired = imax(mp_ncpus, mca_banks); + desired_low = imax(mp_ncpus, mca_banks); + desired_high = imax(mp_ncpus, mca_banks) * 2; mtx_lock_spin(&mca_lock); - while (mca_freecount < desired) { + while (mca_freecount > desired_high) { + rec = STAILQ_FIRST(&mca_freelist); + KASSERT(rec != NULL, ("mca_freecount is %d, but list is empty", + mca_freecount)); + STAILQ_REMOVE_HEAD(&mca_freelist, link); + mca_freecount--; mtx_unlock_spin(&mca_lock); + free(rec, M_MCA); + mtx_lock_spin(&mca_lock); + } + while (mca_freecount < desired_low) { + mtx_unlock_spin(&mca_lock); rec = malloc(sizeof(*rec), M_MCA, M_WAITOK); mtx_lock_spin(&mca_lock); STAILQ_INSERT_TAIL(&mca_freelist, rec, link); @@ -607,9 +622,7 @@ } rec->rec = *record; - rec->logged = 0; - STAILQ_INSERT_TAIL(&mca_records, rec, link); - mca_count++; + STAILQ_INSERT_TAIL(&mca_pending, rec, link); mtx_unlock_spin(&mca_lock); if (mode == CMCI && !cold) taskqueue_enqueue(mca_tq, &mca_refill_task); @@ -796,6 +809,40 @@ } /* + * Store a new record on the mca_records list while enforcing + * mca_maxcount. + */ +static void +mca_store_record(struct mca_internal *mca) +{ + + /* + * If we are storing no records (mca_maxcount == 0), + * we just free this record. + * + * If we are storing records (mca_maxcount != 0) and + * we have free space on the list, store the record + * and increment mca_count. + * + * If we are storing records and we do not have free + * space on the list, store the new record at the + * tail and free the oldest one from the head. + */ + if (mca_maxcount != 0) + STAILQ_INSERT_TAIL(&mca_records, mca, link); + if (mca_maxcount < 0 || mca_count < mca_maxcount) + mca_count++; + else { + if (mca_maxcount != 0) { + mca = STAILQ_FIRST(&mca_records); + STAILQ_REMOVE_HEAD(&mca_records, link); + } + STAILQ_INSERT_TAIL(&mca_freelist, mca, link); + mca_freecount++; + } +} + +/* * Scan the machine check banks on all CPUs by binding to each CPU in * turn. If any of the CPUs contained new machine check records, log * them to the console. @@ -821,13 +868,13 @@ thread_unlock(td); if (count != 0) { mtx_lock_spin(&mca_lock); - STAILQ_FOREACH(mca, &mca_records, link) { - if (!mca->logged) { - mca->logged = 1; - mca_log(&mca->rec); - } + while ((mca = STAILQ_FIRST(&mca_pending)) != NULL) { + STAILQ_REMOVE_HEAD(&mca_pending, link); + mca_log(&mca->rec); + mca_store_record(mca); } mtx_unlock_spin(&mca_lock); + mca_fill_freelist(); } } @@ -853,6 +900,35 @@ return (0); } +static int +sysctl_mca_maxcount(SYSCTL_HANDLER_ARGS) +{ + struct mca_internal *mca; + int error, i; + bool dorefill; + + i = mca_maxcount; + error = sysctl_handle_int(oidp, &i, 0, req); + if (error || req->newptr == NULL) + return (error); + mtx_lock_spin(&mca_lock); + mca_maxcount = i; + dorefill = false; + if (mca_maxcount >= 0) + while (mca_count > mca_maxcount) { + mca = STAILQ_FIRST(&mca_records); + STAILQ_REMOVE_HEAD(&mca_records, link); + mca_count--; + STAILQ_INSERT_TAIL(&mca_freelist, mca, link); + mca_freecount++; + dorefill = true; + } + mtx_unlock_spin(&mca_lock); + if (dorefill && !cold) + taskqueue_enqueue(mca_tq, &mca_refill_task); + return (error); +} + static void mca_createtq(void *dummy) { @@ -933,6 +1009,7 @@ mca_banks = mcg_cap & MCG_CAP_COUNT; mtx_init(&mca_lock, "mca", NULL, MTX_SPIN); STAILQ_INIT(&mca_records); + STAILQ_INIT(&mca_pending); TASK_INIT(&mca_scan_task, 0, mca_scan_cpus, NULL); callout_init(&mca_timer, 1); STAILQ_INIT(&mca_freelist); @@ -942,6 +1019,10 @@ "count", CTLFLAG_RD, (int *)(uintptr_t)&mca_count, 0, "Record count"); SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, + "maxcount", CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, + &mca_maxcount, 0, sysctl_mca_maxcount, "I", + "Maximum record count (-1 is unlimited)"); + SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, "interval", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &mca_ticks, 0, sysctl_positive_int, "I", "Periodic interval in seconds to scan for machine checks"); @@ -1323,7 +1404,8 @@ cmc_intr(void) { struct mca_internal *mca; - int count; + int count, savedfreecnt; + bool dorefill; /* * Serialize MCA bank scanning to prevent collisions from @@ -1334,13 +1416,16 @@ /* If we found anything, log them to the console. */ if (count != 0) { mtx_lock_spin(&mca_lock); - STAILQ_FOREACH(mca, &mca_records, link) { - if (!mca->logged) { - mca->logged = 1; - mca_log(&mca->rec); - } + savedfreecnt = mca_freecount; + while ((mca = STAILQ_FIRST(&mca_pending)) != NULL) { + STAILQ_REMOVE_HEAD(&mca_pending, link); + mca_log(&mca->rec); + mca_store_record(mca); } + dorefill = mca_freecount != savedfreecnt; mtx_unlock_spin(&mca_lock); + if (dorefill && !cold) + taskqueue_enqueue(mca_tq, &mca_refill_task); } } #endif