Page MenuHomeFreeBSD

D23376.id67333.diff
No OneTemporary

D23376.id67333.diff

Index: sys/powerpc/include/cpufunc.h
===================================================================
--- sys/powerpc/include/cpufunc.h
+++ sys/powerpc/include/cpufunc.h
@@ -155,15 +155,8 @@
return (tb);
}
-static __inline void
-mttb(u_quad_t time)
-{
+// mttb is listed after intr_disable and intr_restore.
- mtspr(TBR_TBWL, 0);
- mtspr(TBR_TBWU, (uint32_t)(time >> 32));
- mtspr(TBR_TBWL, (uint32_t)(time & 0xffffffff));
-}
-
static __inline void
eieio(void)
{
@@ -200,6 +193,19 @@
{
mtmsr(msr);
+}
+
+static __inline void
+mttb(u_quad_t time)
+{
+ const uint32_t high= time>>32;
+ const uint32_t low= time&0xffffffffu;
+
+ const register_t predisable_msr= intr_disable();
+ mtspr(TBR_TBWL, 0);
+ mtspr(TBR_TBWU, high);
+ mtspr(TBR_TBWL, low);
+ intr_restore(predisable_msr);
}
static __inline struct pcpu *
Index: sys/powerpc/powermac/platform_powermac.c
===================================================================
--- sys/powerpc/powermac/platform_powermac.c
+++ sys/powerpc/powermac/platform_powermac.c
@@ -333,6 +333,9 @@
return (powermac_smp_fill_cpuref(cpuref, bsp));
}
+// platform_powermac.c is implicitly an AIM context: no explicit AIM test.
+extern volatile int alternate_timebase_sync_style; // 0 indicates old style; 1 indicates new style
+
static int
powermac_smp_start_cpu(platform_t plat, struct pcpu *pc)
{
@@ -366,6 +369,13 @@
}
ap_pcpu = pc;
+
+ // platform_powermac.c is implicitly an AIM context: no explicit AIM test.
+ // Part of: Attempt a better-than-historical approximately
+ // equal timebase value for ap vs. bsp
+ alternate_timebase_sync_style= 1; // So: new style for PowerMacs
+
+ powerpc_sync(); // for ap_pcpu and alternate_timebase_sync_style
if (rstvec_virtbase == NULL)
rstvec_virtbase = pmap_mapdev(0x80000000, PAGE_SIZE);
Index: sys/powerpc/powerpc/machdep.c
===================================================================
--- sys/powerpc/powerpc/machdep.c
+++ sys/powerpc/powerpc/machdep.c
@@ -142,8 +142,6 @@
extern vm_paddr_t kernload;
#endif
-extern void *ap_pcpu;
-
struct pcpu __pcpu[MAXCPU];
static char init_kenv[2048];
@@ -283,8 +281,11 @@
cpu_feature_setup();
#ifdef AIM
+ // May restart the kernel at __start and get back here again.
aim_early_init(fdt, toc, ofentry, mdp, mdp_cookie);
#endif
+
+ mttb(0); // "The TB is a volatile resource and must be initialized during reset."
/*
* Parse metadata if present and fetch parameters. Must be done
Index: sys/powerpc/powerpc/mp_machdep.c
===================================================================
--- sys/powerpc/powerpc/mp_machdep.c
+++ sys/powerpc/powerpc/mp_machdep.c
@@ -69,6 +69,88 @@
static struct mtx ap_boot_mtx;
struct pcb stoppcbs[MAXCPU];
+#if defined(AIM)
+// Part of: Attempt a better-than-historical approximately
+// equal timebase value for ap vs. bsp
+
+volatile int alternate_timebase_sync_style= 0; // 0 indicates old style; 1 indicates new style.
+volatile uint64_t bsp_timebase_sample= 0u;
+
+volatile unsigned int from_bsp_status_flag= 0u;
+// stages: 0u, 1u (bsp ready to start), 2u (bsp tbr value available to ap)
+
+volatile unsigned int from_ap_status_flag= 0u;
+// stages: 0u, 1u (ap ready for bsp tbr value to be found and sent)
+#endif
+
+static __inline uint64_t
+mftb_with_no_pointer_use(void)
+{
+#ifdef __powerpc64__
+ uint64_t tb; // not used for 32-bit powerpc
+ __asm __volatile ("mftb %0" : "=r"(tb));
+ return tb;
+#else
+ uint32_t tbu; // not pointer into tb
+ uint32_t tbl; // not pointer into tb
+
+ do {
+ tbu= mfspr(TBR_TBU);
+ tbl= mfspr(TBR_TBL);
+ } while (tbu != mfspr(TBR_TBU));
+
+ // The construction of the unint64_t value does bias the mttb some
+ // for the round-trip-start side of things.
+ //
+ // The pointers into tb technique would involve a pair of memory
+ // writes and a pair of memory reads instead, the writes being
+ // in the loop.
+ return ((uint64_t)tbu<<32) | tbl;
+#endif
+}
+
+static __inline uint64_t
+mftb_plus_delta(volatile uint64_t* bsp_tbr, int64_t ap_midpoint)
+ // The return value is used in the mttb as the argument.
+{
+#ifdef __powerpc64__
+ uint64_t tb; // not used for 32-bit powerpc
+ __asm __volatile ("mftb %0" : "=r"(tb));
+ // The construction of the unint64_t value does bias the mttb some:
+ // it assignes an earlier time than hoped for, given these later
+ // calculations.
+ return tb + ((int64_t)*bsp_tbr - ap_midpoint);
+#else
+ // Establishes delta_for_approx_match_to_bsp_tbr_values such that:
+ // ap_midpoint+delta_for_approx_match_to_bsp_tbr_values==*bsp_tbr
+ int64_t delta_for_approx_match_to_bsp_tbr_values;
+ uint32_t tbu; // not pointer into tb
+ uint32_t tbl; // not pointer into tb
+
+ do {
+ // The below in-loop style is for avoiding the loop
+ // vs. ap_midpoint's calculation being reversed in
+ // the code generated: volatile is is being put to
+ // use here.
+ delta_for_approx_match_to_bsp_tbr_values= (int64_t)*bsp_tbr-ap_midpoint;
+
+ tbu= mfspr(TBR_TBU);
+ tbl= mfspr(TBR_TBL);
+ } while (tbu != mfspr(TBR_TBU));
+
+ // The construction of the unint64_t value does bias the mttb some:
+ // it assignes an earlier time than hoped for, given these later
+ // calculations. Easily observable on the example 7455 based PowerMac
+ // G4. (Faster than G5 tbr increment rate but a slower processor,)
+ // But the overall process is still an improvement.
+ //
+ // The pointers into tb technique would involve a pair of memory
+ // writes and a pair of memory reads instead, the writes being
+ // in the loop. The "+ . . ." would still be involved.
+ return ( ((uint64_t)tbu<<32) | tbl ) + delta_for_approx_match_to_bsp_tbr_values;
+#endif
+}
+
void
machdep_ap_bootstrap(void)
{
@@ -76,19 +158,76 @@
PCPU_SET(awake, 1);
__asm __volatile("msync; isync");
+#if defined(AIM)
+ powerpc_sync();
+ isync();
+ if (1==alternate_timebase_sync_style)
+ {
+ // Part of: Attempt a better-than-historical approximately
+ // equal timebase value for ap vs. bsp
+
+ // No claim to deal with overflow/wraparound of tbr, or even
+ // of the upper bit being on.
+
+ register_t oldmsr= intr_disable();
+
+ while (1u!=from_bsp_status_flag)
+ ; // spin waiting for bsp to flag that its ready to start.
+
+ // Start to measure a round trip:: to the bsp and back.
+
+ isync(); // Be sure below mftb() result is not from earlier speculative execution.
+ uint64_t const start_round_trip_time_on_ap= mftb_with_no_pointer_use();
+ atomic_store_rel_int(&from_ap_status_flag, 1u); // bsp waits for such before its mftb().
+
+ while (2u!=from_bsp_status_flag)
+ ; // spin waiting for bsp's tbr value
+
+ // Mid-point of ap round trip and the bsp timebase value should be approximately equal
+ // when the tbr's are well matched, absent interruptions on both sides.
+
+ isync(); // Be sure below mftb() result is not from earlier speculative execution.
+ uint64_t const end_round_trip_time_on_ap= mftb_with_no_pointer_use();
+ isync(); // Be sure above mftb() result is not from overlapping with the following.
+
+ int64_t const approx_round_trip_tbr_delta_on_ap
+ = (int64_t)end_round_trip_time_on_ap - (int64_t)start_round_trip_time_on_ap;
+ int64_t const ap_midpoint_value
+ = (int64_t)start_round_trip_time_on_ap + approx_round_trip_tbr_delta_on_ap/2;
+
+ // The mftb_plus_delta use is for helping to the control the code order relative
+ // to tbr access. Such issues are notable for the 7455 based 2-socket PowerMacs,
+ // for example. Faster tbr increment rate than the G5's but slower processors
+ // and such. Still, overall this definitely helps such contexts compared to the
+ // historical style of timebase synchronization.
+ isync(); // Be sure below mftb() result is not from earlier speculative execution.
+ mttb(mftb_plus_delta(&bsp_timebase_sample,ap_midpoint_value));
+
+ atomic_store_rel_int(&from_bsp_status_flag, 0u); // Get ready for next ap in bsp loop
+ atomic_store_rel_int(&from_ap_status_flag, 0u); // Flag bsp that this ap is done
+
+ mtmsr(oldmsr);
+ }
+#endif
+
while (ap_letgo == 0)
nop_prio_vlow();
nop_prio_medium();
- /*
- * Set timebase as soon as possible to meet an implicit rendezvous
- * from cpu_mp_unleash(), which sets ap_letgo and then immediately
- * sets timebase.
- *
- * Note that this is instrinsically racy and is only relevant on
- * platforms that do not support better mechanisms.
- */
- platform_smp_timebase_sync(ap_timebase, 1);
+#if defined(AIM)
+ if (0==alternate_timebase_sync_style)
+#endif
+ {
+ /*
+ * Set timebase as soon as possible to meet an implicit rendezvous
+ * from cpu_mp_unleash(), which sets ap_letgo and then immediately
+ * sets timebase.
+ *
+ * Note that this is instrinsically racy and is only relevant on
+ * platforms that do not support better mechanisms.
+ */
+ platform_smp_timebase_sync(ap_timebase, 1);
+ }
/* Give platform code a chance to do anything else necessary */
platform_smp_ap_init();
@@ -261,20 +400,56 @@
pc->pc_cpuid, (uintmax_t)pc->pc_hwref,
pc->pc_awake);
smp_cpus++;
+
+#if defined(AIM)
+ // Part of: Attempt a better-than-historical approximately
+ // equal timebase value for ap vs. bsp
+ powerpc_sync();
+ isync();
+ if (1==alternate_timebase_sync_style)
+ {
+ register_t oldmsr= intr_disable();
+
+ atomic_store_rel_int(&from_bsp_status_flag, 1u); // bsp ready to start.
+
+ while (1u!=from_ap_status_flag)
+ ; // spin waiting for ap to flag: time to send a tbr.
+
+ isync(); // Be sure below mftb() result is not from earlier.
+ bsp_timebase_sample= mftb_with_no_pointer_use();
+ atomic_store_rel_int(&from_bsp_status_flag, 2u); // bsp tbr available.
+
+ // Most of the rest of the usage is in machdep_ap_bootstrap,
+ // other than controling alternate_timebase_sync_style value.
+
+ while (0u!=from_ap_status_flag)
+ ; // spin waiting for ap to be done with the sample.
+
+ mtmsr(oldmsr);
+ }
+#endif
} else
CPU_SET(pc->pc_cpuid, &stopped_cpus);
}
ap_awake = 1;
- /* Provide our current DEC and TB values for APs */
- ap_timebase = mftb() + 10;
- __asm __volatile("msync; isync");
+#if defined(AIM)
+ if (0==alternate_timebase_sync_style)
+#endif
+ {
+ /* Provide our current DEC and TB values for APs */
+ ap_timebase = mftb() + 10;
+ __asm __volatile("msync; isync");
+ }
/* Let APs continue */
atomic_store_rel_int(&ap_letgo, 1);
- platform_smp_timebase_sync(ap_timebase, 0);
+#if defined(AIM)
+ if (0==alternate_timebase_sync_style)
+#endif
+ platform_smp_timebase_sync(ap_timebase, 0);
while (ap_awake < smp_cpus)
;

File Metadata

Mime Type
text/plain
Expires
Tue, Apr 21, 2:11 PM (12 h, 6 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31914840
Default Alt Text
D23376.id67333.diff (10 KB)

Event Timeline