Index: sys/amd64/amd64/mp_machdep.c =================================================================== --- sys/amd64/amd64/mp_machdep.c +++ sys/amd64/amd64/mp_machdep.c @@ -1065,14 +1065,27 @@ { /* + * This attempts to follow the algorithm described in the + * Intel Multiprocessor Specification v1.4 in section B.4. + * For each IPI, we allow the local APIC ~20us to deliver the + * IPI. If that times out, we panic. + */ + + /* * first we do an INIT IPI: this INIT IPI might be run, resetting * and running the target CPU. OR this INIT IPI might be latched (P5 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be * ignored. */ - lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | + lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL | APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id); - lapic_ipi_wait(-1); + lapic_ipi_wait(20); + + /* Explicitly deassert the INIT IPI. */ + lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL | + APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, + apic_id); + DELAY(10000); /* wait ~10mS */ /* @@ -1084,9 +1097,11 @@ * will run. */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | - APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | + APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | vector, apic_id); - lapic_ipi_wait(-1); + if (!lapic_ipi_wait(20)) + panic("Failed to deliver first STARTUP IPI to APIC %d", + apic_id); DELAY(200); /* wait ~200uS */ /* @@ -1096,9 +1111,12 @@ * recognized after hardware RESET or INIT IPI. */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | - APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | + APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | vector, apic_id); - lapic_ipi_wait(-1); + if (!lapic_ipi_wait(20)) + panic("Failed to deliver second STARTUP IPI to APIC %d", + apic_id); + DELAY(200); /* wait ~200uS */ } Index: sys/i386/i386/mp_machdep.c =================================================================== --- sys/i386/i386/mp_machdep.c +++ sys/i386/i386/mp_machdep.c @@ -1138,14 +1138,27 @@ { /* + * This attempts to follow the algorithm described in the + * Intel Multiprocessor Specification v1.4 in section B.4. + * For each IPI, we allow the local APIC ~20us to deliver the + * IPI. If that times out, we panic. + */ + + /* * first we do an INIT IPI: this INIT IPI might be run, resetting * and running the target CPU. OR this INIT IPI might be latched (P5 * bug), CPU waiting for STARTUP IPI. OR this INIT IPI might be * ignored. */ - lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | + lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL | APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, apic_id); - lapic_ipi_wait(-1); + lapic_ipi_wait(20); + + /* Explicitly deassert the INIT IPI. */ + lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_LEVEL | + APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_INIT, + apic_id); + DELAY(10000); /* wait ~10mS */ /* @@ -1157,9 +1170,11 @@ * will run. */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | - APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | + APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | vector, apic_id); - lapic_ipi_wait(-1); + if (!lapic_ipi_wait(20)) + panic("Failed to deliver first STARTUP IPI to APIC %d", + apic_id); DELAY(200); /* wait ~200uS */ /* @@ -1169,9 +1184,12 @@ * recognized after hardware RESET or INIT IPI. */ lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE | - APIC_LEVEL_DEASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | + APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_STARTUP | vector, apic_id); - lapic_ipi_wait(-1); + if (!lapic_ipi_wait(20)) + panic("Failed to deliver second STARTUP IPI to APIC %d", + apic_id); + DELAY(200); /* wait ~200uS */ } Index: sys/x86/x86/local_apic.c =================================================================== --- sys/x86/x86/local_apic.c +++ sys/x86/x86/local_apic.c @@ -1452,22 +1452,22 @@ static int native_lapic_ipi_wait(int delay) { - int x, incr; + int x; /* - * Wait delay loops for IPI to be sent. This is highly bogus - * since this is sensitive to CPU clock speed. If delay is + * Wait delay microseconds for IPI to be sent. If delay is * -1, we wait forever. */ if (delay == -1) { - incr = 0; - delay = 1; - } else - incr = 1; - for (x = 0; x < delay; x += incr) { + while ((lapic->icr_lo & APIC_DELSTAT_MASK) != APIC_DELSTAT_IDLE) + ia32_pause(); + return (1); + } + + for (x = 0; x < delay; x += 5) { if ((lapic->icr_lo & APIC_DELSTAT_MASK) == APIC_DELSTAT_IDLE) return (1); - ia32_pause(); + DELAY(5); } return (0); } @@ -1501,9 +1501,9 @@ intr_restore(saveintr); } -#define BEFORE_SPIN 1000000 +#define BEFORE_SPIN 50000 #ifdef DETECT_DEADLOCK -#define AFTER_SPIN 1000 +#define AFTER_SPIN 50 #endif static void @@ -1514,7 +1514,7 @@ KASSERT((vector & ~APIC_VECTOR_MASK) == 0, ("%s: invalid vector %d", __func__, vector)); - icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE; + icrlo = APIC_DESTMODE_PHY | APIC_TRIGMOD_EDGE | APIC_LEVEL_ASSERT; /* * IPI_STOP_HARD is just a "fake" vector used to send a NMI. @@ -1522,9 +1522,9 @@ * the vector. */ if (vector == IPI_STOP_HARD) - icrlo |= APIC_DELMODE_NMI | APIC_LEVEL_ASSERT; + icrlo |= APIC_DELMODE_NMI; else - icrlo |= vector | APIC_DELMODE_FIXED | APIC_LEVEL_DEASSERT; + icrlo |= vector | APIC_DELMODE_FIXED; destfield = 0; switch (dest) { case APIC_IPI_DEST_SELF: