Index: head/sys/x86/x86/mca.c =================================================================== --- head/sys/x86/x86/mca.c +++ head/sys/x86/x86/mca.c @@ -86,7 +86,6 @@ struct mca_internal { struct mca_record rec; - int logged; STAILQ_ENTRY(mca_internal) link; }; @@ -101,6 +100,7 @@ static volatile int mca_count; /* Number of records stored. */ static int mca_banks; /* Number of per-CPU register banks. */ +static int mca_maxcount = -1; /* Limit on records stored. (-1 = unlimited) */ static SYSCTL_NODE(_hw, OID_AUTO, mca, CTLFLAG_RD, NULL, "Machine Check Architecture"); @@ -125,10 +125,11 @@ static STAILQ_HEAD(, mca_internal) mca_freelist; static int mca_freecount; static STAILQ_HEAD(, mca_internal) mca_records; +static STAILQ_HEAD(, mca_internal) mca_pending; static struct callout mca_timer; static int mca_ticks = 3600; /* Check hourly by default. */ static struct taskqueue *mca_tq; -static struct task mca_refill_task, mca_scan_task; +static struct task mca_resize_task, mca_scan_task; static struct mtx mca_lock; static unsigned int @@ -557,32 +558,49 @@ } static void -mca_fill_freelist(void) +mca_resize_freelist(void) { - struct mca_internal *rec; - int desired; + struct mca_internal *next, *rec; + STAILQ_HEAD(, mca_internal) tmplist; + int count, i, desired_max, desired_min; /* * Ensure we have at least one record for each bank and one - * record per CPU. + * record per CPU, but no more than twice that amount. */ - desired = imax(mp_ncpus, mca_banks); + desired_min = imax(mp_ncpus, mca_banks); + desired_max = imax(mp_ncpus, mca_banks) * 2; + STAILQ_INIT(&tmplist); mtx_lock_spin(&mca_lock); - while (mca_freecount < desired) { + while (mca_freecount > desired_max) { + rec = STAILQ_FIRST(&mca_freelist); + KASSERT(rec != NULL, ("mca_freecount is %d, but list is empty", + mca_freecount)); + STAILQ_REMOVE_HEAD(&mca_freelist, link); + mca_freecount--; + STAILQ_INSERT_TAIL(&tmplist, rec, link); + } + while (mca_freecount < desired_min) { + count = desired_min - mca_freecount; mtx_unlock_spin(&mca_lock); - rec = malloc(sizeof(*rec), M_MCA, M_WAITOK); + for (i = 0; i < count; i++) { + rec = malloc(sizeof(*rec), M_MCA, M_WAITOK); + STAILQ_INSERT_TAIL(&tmplist, rec, link); + } mtx_lock_spin(&mca_lock); - STAILQ_INSERT_TAIL(&mca_freelist, rec, link); - mca_freecount++; + STAILQ_CONCAT(&mca_freelist, &tmplist); + mca_freecount += count; } mtx_unlock_spin(&mca_lock); + STAILQ_FOREACH_SAFE(rec, &tmplist, link, next) + free(rec, M_MCA); } static void -mca_refill(void *context, int pending) +mca_resize(void *context, int pending) { - mca_fill_freelist(); + mca_resize_freelist(); } static void @@ -607,12 +625,8 @@ } rec->rec = *record; - rec->logged = 0; - STAILQ_INSERT_TAIL(&mca_records, rec, link); - mca_count++; + STAILQ_INSERT_TAIL(&mca_pending, rec, link); mtx_unlock_spin(&mca_lock); - if (mode == CMCI && !cold) - taskqueue_enqueue(mca_tq, &mca_refill_task); } #ifdef DEV_APIC @@ -788,14 +802,69 @@ } #endif } - if (mode == POLLED) - mca_fill_freelist(); if (recoverablep != NULL) *recoverablep = recoverable; return (count); } /* + * Store a new record on the mca_records list while enforcing + * mca_maxcount. + */ +static void +mca_store_record(struct mca_internal *mca) +{ + + /* + * If we are storing no records (mca_maxcount == 0), + * we just free this record. + * + * If we are storing records (mca_maxcount != 0) and + * we have free space on the list, store the record + * and increment mca_count. + * + * If we are storing records and we do not have free + * space on the list, store the new record at the + * tail and free the oldest one from the head. + */ + if (mca_maxcount != 0) + STAILQ_INSERT_TAIL(&mca_records, mca, link); + if (mca_maxcount < 0 || mca_count < mca_maxcount) + mca_count++; + else { + if (mca_maxcount != 0) { + mca = STAILQ_FIRST(&mca_records); + STAILQ_REMOVE_HEAD(&mca_records, link); + } + STAILQ_INSERT_TAIL(&mca_freelist, mca, link); + mca_freecount++; + } +} + +/* + * Do the work to process machine check records which have just been + * gathered. Print any pending logs to the console. Queue them for storage. + * Trigger a resizing of the free list. + */ +static void +mca_process_records(enum scan_mode mode) +{ + struct mca_internal *mca; + + mtx_lock_spin(&mca_lock); + while ((mca = STAILQ_FIRST(&mca_pending)) != NULL) { + STAILQ_REMOVE_HEAD(&mca_pending, link); + mca_log(&mca->rec); + mca_store_record(mca); + } + mtx_unlock_spin(&mca_lock); + if (mode == POLLED) + mca_resize_freelist(); + else if (!cold) + taskqueue_enqueue(mca_tq, &mca_resize_task); +} + +/* * Scan the machine check banks on all CPUs by binding to each CPU in * turn. If any of the CPUs contained new machine check records, log * them to the console. @@ -803,11 +872,10 @@ static void mca_scan_cpus(void *context, int pending) { - struct mca_internal *mca; struct thread *td; int count, cpu; - mca_fill_freelist(); + mca_resize_freelist(); td = curthread; count = 0; thread_lock(td); @@ -819,16 +887,8 @@ sched_unbind(td); } thread_unlock(td); - if (count != 0) { - mtx_lock_spin(&mca_lock); - STAILQ_FOREACH(mca, &mca_records, link) { - if (!mca->logged) { - mca->logged = 1; - mca_log(&mca->rec); - } - } - mtx_unlock_spin(&mca_lock); - } + if (count != 0) + mca_process_records(POLLED); } static void @@ -853,6 +913,35 @@ return (0); } +static int +sysctl_mca_maxcount(SYSCTL_HANDLER_ARGS) +{ + struct mca_internal *mca; + int error, i; + bool doresize; + + i = mca_maxcount; + error = sysctl_handle_int(oidp, &i, 0, req); + if (error || req->newptr == NULL) + return (error); + mtx_lock_spin(&mca_lock); + mca_maxcount = i; + doresize = false; + if (mca_maxcount >= 0) + while (mca_count > mca_maxcount) { + mca = STAILQ_FIRST(&mca_records); + STAILQ_REMOVE_HEAD(&mca_records, link); + mca_count--; + STAILQ_INSERT_TAIL(&mca_freelist, mca, link); + mca_freecount++; + doresize = true; + } + mtx_unlock_spin(&mca_lock); + if (doresize && !cold) + taskqueue_enqueue(mca_tq, &mca_resize_task); + return (error); +} + static void mca_createtq(void *dummy) { @@ -864,7 +953,7 @@ taskqueue_start_threads(&mca_tq, 1, PI_SWI(SWI_TQ), "mca taskq"); /* CMCIs during boot may have claimed items from the freelist. */ - mca_fill_freelist(); + mca_resize_freelist(); } SYSINIT(mca_createtq, SI_SUB_CONFIGURE, SI_ORDER_ANY, mca_createtq, NULL); @@ -933,15 +1022,20 @@ mca_banks = mcg_cap & MCG_CAP_COUNT; mtx_init(&mca_lock, "mca", NULL, MTX_SPIN); STAILQ_INIT(&mca_records); + STAILQ_INIT(&mca_pending); TASK_INIT(&mca_scan_task, 0, mca_scan_cpus, NULL); callout_init(&mca_timer, 1); STAILQ_INIT(&mca_freelist); - TASK_INIT(&mca_refill_task, 0, mca_refill, NULL); - mca_fill_freelist(); + TASK_INIT(&mca_resize_task, 0, mca_resize, NULL); + mca_resize_freelist(); SYSCTL_ADD_INT(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, "count", CTLFLAG_RD, (int *)(uintptr_t)&mca_count, 0, "Record count"); SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, + "maxcount", CTLTYPE_INT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, + &mca_maxcount, 0, sysctl_mca_maxcount, "I", + "Maximum record count (-1 is unlimited)"); + SYSCTL_ADD_PROC(NULL, SYSCTL_STATIC_CHILDREN(_hw_mca), OID_AUTO, "interval", CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, &mca_ticks, 0, sysctl_positive_int, "I", "Periodic interval in seconds to scan for machine checks"); @@ -1322,25 +1416,14 @@ void cmc_intr(void) { - struct mca_internal *mca; - int count; /* * Serialize MCA bank scanning to prevent collisions from * sibling threads. + * + * If we found anything, log them to the console. */ - count = mca_scan(CMCI, NULL); - - /* If we found anything, log them to the console. */ - if (count != 0) { - mtx_lock_spin(&mca_lock); - STAILQ_FOREACH(mca, &mca_records, link) { - if (!mca->logged) { - mca->logged = 1; - mca_log(&mca->rec); - } - } - mtx_unlock_spin(&mca_lock); - } + if (mca_scan(CMCI, NULL) != 0) + mca_process_records(CMCI); } #endif