Index: sys/powerpc/include/cpufunc.h =================================================================== --- sys/powerpc/include/cpufunc.h +++ sys/powerpc/include/cpufunc.h @@ -155,15 +155,8 @@ return (tb); } -static __inline void -mttb(u_quad_t time) -{ +// mttb is listed after intr_disable and intr_restore. - mtspr(TBR_TBWL, 0); - mtspr(TBR_TBWU, (uint32_t)(time >> 32)); - mtspr(TBR_TBWL, (uint32_t)(time & 0xffffffff)); -} - static __inline void eieio(void) { @@ -200,6 +193,19 @@ { mtmsr(msr); +} + +static __inline void +mttb(u_quad_t time) +{ + const uint32_t high= time>>32; + const uint32_t low= time&0xffffffffu; + + const register_t predisable_msr= intr_disable(); + mtspr(TBR_TBWL, 0); + mtspr(TBR_TBWU, high); + mtspr(TBR_TBWL, low); + intr_restore(predisable_msr); } static __inline struct pcpu * Index: sys/powerpc/powermac/platform_powermac.c =================================================================== --- sys/powerpc/powermac/platform_powermac.c +++ sys/powerpc/powermac/platform_powermac.c @@ -333,6 +333,9 @@ return (powermac_smp_fill_cpuref(cpuref, bsp)); } +// platform_powermac.c is implicitly an AIM context: no explicit AIM test. +extern volatile int alternate_timebase_sync_style; // 0 indicates old style; 1 indicates new style + static int powermac_smp_start_cpu(platform_t plat, struct pcpu *pc) { @@ -366,6 +369,13 @@ } ap_pcpu = pc; + + // platform_powermac.c is implicitly an AIM context: no explicit AIM test. + // Part of: Attempt a better-than-historical approximately + // equal timebase value for ap vs. bsp + alternate_timebase_sync_style= 1; // So: new style for PowerMacs + + powerpc_sync(); // for ap_pcpu and alternate_timebase_sync_style if (rstvec_virtbase == NULL) rstvec_virtbase = pmap_mapdev(0x80000000, PAGE_SIZE); Index: sys/powerpc/powerpc/machdep.c =================================================================== --- sys/powerpc/powerpc/machdep.c +++ sys/powerpc/powerpc/machdep.c @@ -142,8 +142,6 @@ extern vm_paddr_t kernload; #endif -extern void *ap_pcpu; - struct pcpu __pcpu[MAXCPU]; static char init_kenv[2048]; @@ -283,8 +281,11 @@ cpu_feature_setup(); #ifdef AIM + // May restart the kernel at __start and get back here again. aim_early_init(fdt, toc, ofentry, mdp, mdp_cookie); #endif + + mttb(0); // "The TB is a volatile resource and must be initialized during reset." /* * Parse metadata if present and fetch parameters. Must be done Index: sys/powerpc/powerpc/mp_machdep.c =================================================================== --- sys/powerpc/powerpc/mp_machdep.c +++ sys/powerpc/powerpc/mp_machdep.c @@ -69,6 +69,88 @@ static struct mtx ap_boot_mtx; struct pcb stoppcbs[MAXCPU]; +#if defined(AIM) +// Part of: Attempt a better-than-historical approximately +// equal timebase value for ap vs. bsp + +volatile int alternate_timebase_sync_style= 0; // 0 indicates old style; 1 indicates new style. +volatile uint64_t bsp_timebase_sample= 0u; + +volatile unsigned int from_bsp_status_flag= 0u; +// stages: 0u, 1u (bsp ready to start), 2u (bsp tbr value available to ap) + +volatile unsigned int from_ap_status_flag= 0u; +// stages: 0u, 1u (ap ready for bsp tbr value to be found and sent) +#endif + +static __inline uint64_t +mftb_with_no_pointer_use(void) +{ +#ifdef __powerpc64__ + uint64_t tb; // not used for 32-bit powerpc + __asm __volatile ("mftb %0" : "=r"(tb)); + return tb; +#else + uint32_t tbu; // not pointer into tb + uint32_t tbl; // not pointer into tb + + do { + tbu= mfspr(TBR_TBU); + tbl= mfspr(TBR_TBL); + } while (tbu != mfspr(TBR_TBU)); + + // The construction of the unint64_t value does bias the mttb some + // for the round-trip-start side of things. + // + // The pointers into tb technique would involve a pair of memory + // writes and a pair of memory reads instead, the writes being + // in the loop. + return ((uint64_t)tbu<<32) | tbl; +#endif +} + +static __inline uint64_t +mftb_plus_delta(volatile uint64_t* bsp_tbr, int64_t ap_midpoint) + // The return value is used in the mttb as the argument. +{ +#ifdef __powerpc64__ + uint64_t tb; // not used for 32-bit powerpc + __asm __volatile ("mftb %0" : "=r"(tb)); + // The construction of the unint64_t value does bias the mttb some: + // it assignes an earlier time than hoped for, given these later + // calculations. + return tb + ((int64_t)*bsp_tbr - ap_midpoint); +#else + // Establishes delta_for_approx_match_to_bsp_tbr_values such that: + // ap_midpoint+delta_for_approx_match_to_bsp_tbr_values==*bsp_tbr + int64_t delta_for_approx_match_to_bsp_tbr_values; + uint32_t tbu; // not pointer into tb + uint32_t tbl; // not pointer into tb + + do { + // The below in-loop style is for avoiding the loop + // vs. ap_midpoint's calculation being reversed in + // the code generated: volatile is is being put to + // use here. + delta_for_approx_match_to_bsp_tbr_values= (int64_t)*bsp_tbr-ap_midpoint; + + tbu= mfspr(TBR_TBU); + tbl= mfspr(TBR_TBL); + } while (tbu != mfspr(TBR_TBU)); + + // The construction of the unint64_t value does bias the mttb some: + // it assignes an earlier time than hoped for, given these later + // calculations. Easily observable on the example 7455 based PowerMac + // G4. (Faster than G5 tbr increment rate but a slower processor,) + // But the overall process is still an improvement. + // + // The pointers into tb technique would involve a pair of memory + // writes and a pair of memory reads instead, the writes being + // in the loop. The "+ . . ." would still be involved. + return ( ((uint64_t)tbu<<32) | tbl ) + delta_for_approx_match_to_bsp_tbr_values; +#endif +} + void machdep_ap_bootstrap(void) { @@ -76,19 +158,76 @@ PCPU_SET(awake, 1); __asm __volatile("msync; isync"); +#if defined(AIM) + powerpc_sync(); + isync(); + if (1==alternate_timebase_sync_style) + { + // Part of: Attempt a better-than-historical approximately + // equal timebase value for ap vs. bsp + + // No claim to deal with overflow/wraparound of tbr, or even + // of the upper bit being on. + + register_t oldmsr= intr_disable(); + + while (1u!=from_bsp_status_flag) + ; // spin waiting for bsp to flag that its ready to start. + + // Start to measure a round trip:: to the bsp and back. + + isync(); // Be sure below mftb() result is not from earlier speculative execution. + uint64_t const start_round_trip_time_on_ap= mftb_with_no_pointer_use(); + atomic_store_rel_int(&from_ap_status_flag, 1u); // bsp waits for such before its mftb(). + + while (2u!=from_bsp_status_flag) + ; // spin waiting for bsp's tbr value + + // Mid-point of ap round trip and the bsp timebase value should be approximately equal + // when the tbr's are well matched, absent interruptions on both sides. + + isync(); // Be sure below mftb() result is not from earlier speculative execution. + uint64_t const end_round_trip_time_on_ap= mftb_with_no_pointer_use(); + isync(); // Be sure above mftb() result is not from overlapping with the following. + + int64_t const approx_round_trip_tbr_delta_on_ap + = (int64_t)end_round_trip_time_on_ap - (int64_t)start_round_trip_time_on_ap; + int64_t const ap_midpoint_value + = (int64_t)start_round_trip_time_on_ap + approx_round_trip_tbr_delta_on_ap/2; + + // The mftb_plus_delta use is for helping to the control the code order relative + // to tbr access. Such issues are notable for the 7455 based 2-socket PowerMacs, + // for example. Faster tbr increment rate than the G5's but slower processors + // and such. Still, overall this definitely helps such contexts compared to the + // historical style of timebase synchronization. + isync(); // Be sure below mftb() result is not from earlier speculative execution. + mttb(mftb_plus_delta(&bsp_timebase_sample,ap_midpoint_value)); + + atomic_store_rel_int(&from_bsp_status_flag, 0u); // Get ready for next ap in bsp loop + atomic_store_rel_int(&from_ap_status_flag, 0u); // Flag bsp that this ap is done + + mtmsr(oldmsr); + } +#endif + while (ap_letgo == 0) nop_prio_vlow(); nop_prio_medium(); - /* - * Set timebase as soon as possible to meet an implicit rendezvous - * from cpu_mp_unleash(), which sets ap_letgo and then immediately - * sets timebase. - * - * Note that this is instrinsically racy and is only relevant on - * platforms that do not support better mechanisms. - */ - platform_smp_timebase_sync(ap_timebase, 1); +#if defined(AIM) + if (0==alternate_timebase_sync_style) +#endif + { + /* + * Set timebase as soon as possible to meet an implicit rendezvous + * from cpu_mp_unleash(), which sets ap_letgo and then immediately + * sets timebase. + * + * Note that this is instrinsically racy and is only relevant on + * platforms that do not support better mechanisms. + */ + platform_smp_timebase_sync(ap_timebase, 1); + } /* Give platform code a chance to do anything else necessary */ platform_smp_ap_init(); @@ -261,20 +400,56 @@ pc->pc_cpuid, (uintmax_t)pc->pc_hwref, pc->pc_awake); smp_cpus++; + +#if defined(AIM) + // Part of: Attempt a better-than-historical approximately + // equal timebase value for ap vs. bsp + powerpc_sync(); + isync(); + if (1==alternate_timebase_sync_style) + { + register_t oldmsr= intr_disable(); + + atomic_store_rel_int(&from_bsp_status_flag, 1u); // bsp ready to start. + + while (1u!=from_ap_status_flag) + ; // spin waiting for ap to flag: time to send a tbr. + + isync(); // Be sure below mftb() result is not from earlier. + bsp_timebase_sample= mftb_with_no_pointer_use(); + atomic_store_rel_int(&from_bsp_status_flag, 2u); // bsp tbr available. + + // Most of the rest of the usage is in machdep_ap_bootstrap, + // other than controling alternate_timebase_sync_style value. + + while (0u!=from_ap_status_flag) + ; // spin waiting for ap to be done with the sample. + + mtmsr(oldmsr); + } +#endif } else CPU_SET(pc->pc_cpuid, &stopped_cpus); } ap_awake = 1; - /* Provide our current DEC and TB values for APs */ - ap_timebase = mftb() + 10; - __asm __volatile("msync; isync"); +#if defined(AIM) + if (0==alternate_timebase_sync_style) +#endif + { + /* Provide our current DEC and TB values for APs */ + ap_timebase = mftb() + 10; + __asm __volatile("msync; isync"); + } /* Let APs continue */ atomic_store_rel_int(&ap_letgo, 1); - platform_smp_timebase_sync(ap_timebase, 0); +#if defined(AIM) + if (0==alternate_timebase_sync_style) +#endif + platform_smp_timebase_sync(ap_timebase, 0); while (ap_awake < smp_cpus) ;