Page Menu
Home
FreeBSD
Search
Configure Global Search
Log In
Files
F153339487
D23376.id67333.diff
No One
Temporary
Actions
View File
Edit File
Delete File
View Transforms
Subscribe
Mute Notifications
Flag For Later
Award Token
Size
10 KB
Referenced Files
None
Subscribers
None
D23376.id67333.diff
View Options
Index: sys/powerpc/include/cpufunc.h
===================================================================
--- sys/powerpc/include/cpufunc.h
+++ sys/powerpc/include/cpufunc.h
@@ -155,15 +155,8 @@
return (tb);
}
-static __inline void
-mttb(u_quad_t time)
-{
+// mttb is listed after intr_disable and intr_restore.
- mtspr(TBR_TBWL, 0);
- mtspr(TBR_TBWU, (uint32_t)(time >> 32));
- mtspr(TBR_TBWL, (uint32_t)(time & 0xffffffff));
-}
-
static __inline void
eieio(void)
{
@@ -200,6 +193,19 @@
{
mtmsr(msr);
+}
+
+static __inline void
+mttb(u_quad_t time)
+{
+ const uint32_t high= time>>32;
+ const uint32_t low= time&0xffffffffu;
+
+ const register_t predisable_msr= intr_disable();
+ mtspr(TBR_TBWL, 0);
+ mtspr(TBR_TBWU, high);
+ mtspr(TBR_TBWL, low);
+ intr_restore(predisable_msr);
}
static __inline struct pcpu *
Index: sys/powerpc/powermac/platform_powermac.c
===================================================================
--- sys/powerpc/powermac/platform_powermac.c
+++ sys/powerpc/powermac/platform_powermac.c
@@ -333,6 +333,9 @@
return (powermac_smp_fill_cpuref(cpuref, bsp));
}
+// platform_powermac.c is implicitly an AIM context: no explicit AIM test.
+extern volatile int alternate_timebase_sync_style; // 0 indicates old style; 1 indicates new style
+
static int
powermac_smp_start_cpu(platform_t plat, struct pcpu *pc)
{
@@ -366,6 +369,13 @@
}
ap_pcpu = pc;
+
+ // platform_powermac.c is implicitly an AIM context: no explicit AIM test.
+ // Part of: Attempt a better-than-historical approximately
+ // equal timebase value for ap vs. bsp
+ alternate_timebase_sync_style= 1; // So: new style for PowerMacs
+
+ powerpc_sync(); // for ap_pcpu and alternate_timebase_sync_style
if (rstvec_virtbase == NULL)
rstvec_virtbase = pmap_mapdev(0x80000000, PAGE_SIZE);
Index: sys/powerpc/powerpc/machdep.c
===================================================================
--- sys/powerpc/powerpc/machdep.c
+++ sys/powerpc/powerpc/machdep.c
@@ -142,8 +142,6 @@
extern vm_paddr_t kernload;
#endif
-extern void *ap_pcpu;
-
struct pcpu __pcpu[MAXCPU];
static char init_kenv[2048];
@@ -283,8 +281,11 @@
cpu_feature_setup();
#ifdef AIM
+ // May restart the kernel at __start and get back here again.
aim_early_init(fdt, toc, ofentry, mdp, mdp_cookie);
#endif
+
+ mttb(0); // "The TB is a volatile resource and must be initialized during reset."
/*
* Parse metadata if present and fetch parameters. Must be done
Index: sys/powerpc/powerpc/mp_machdep.c
===================================================================
--- sys/powerpc/powerpc/mp_machdep.c
+++ sys/powerpc/powerpc/mp_machdep.c
@@ -69,6 +69,88 @@
static struct mtx ap_boot_mtx;
struct pcb stoppcbs[MAXCPU];
+#if defined(AIM)
+// Part of: Attempt a better-than-historical approximately
+// equal timebase value for ap vs. bsp
+
+volatile int alternate_timebase_sync_style= 0; // 0 indicates old style; 1 indicates new style.
+volatile uint64_t bsp_timebase_sample= 0u;
+
+volatile unsigned int from_bsp_status_flag= 0u;
+// stages: 0u, 1u (bsp ready to start), 2u (bsp tbr value available to ap)
+
+volatile unsigned int from_ap_status_flag= 0u;
+// stages: 0u, 1u (ap ready for bsp tbr value to be found and sent)
+#endif
+
+static __inline uint64_t
+mftb_with_no_pointer_use(void)
+{
+#ifdef __powerpc64__
+ uint64_t tb; // not used for 32-bit powerpc
+ __asm __volatile ("mftb %0" : "=r"(tb));
+ return tb;
+#else
+ uint32_t tbu; // not pointer into tb
+ uint32_t tbl; // not pointer into tb
+
+ do {
+ tbu= mfspr(TBR_TBU);
+ tbl= mfspr(TBR_TBL);
+ } while (tbu != mfspr(TBR_TBU));
+
+ // The construction of the unint64_t value does bias the mttb some
+ // for the round-trip-start side of things.
+ //
+ // The pointers into tb technique would involve a pair of memory
+ // writes and a pair of memory reads instead, the writes being
+ // in the loop.
+ return ((uint64_t)tbu<<32) | tbl;
+#endif
+}
+
+static __inline uint64_t
+mftb_plus_delta(volatile uint64_t* bsp_tbr, int64_t ap_midpoint)
+ // The return value is used in the mttb as the argument.
+{
+#ifdef __powerpc64__
+ uint64_t tb; // not used for 32-bit powerpc
+ __asm __volatile ("mftb %0" : "=r"(tb));
+ // The construction of the unint64_t value does bias the mttb some:
+ // it assignes an earlier time than hoped for, given these later
+ // calculations.
+ return tb + ((int64_t)*bsp_tbr - ap_midpoint);
+#else
+ // Establishes delta_for_approx_match_to_bsp_tbr_values such that:
+ // ap_midpoint+delta_for_approx_match_to_bsp_tbr_values==*bsp_tbr
+ int64_t delta_for_approx_match_to_bsp_tbr_values;
+ uint32_t tbu; // not pointer into tb
+ uint32_t tbl; // not pointer into tb
+
+ do {
+ // The below in-loop style is for avoiding the loop
+ // vs. ap_midpoint's calculation being reversed in
+ // the code generated: volatile is is being put to
+ // use here.
+ delta_for_approx_match_to_bsp_tbr_values= (int64_t)*bsp_tbr-ap_midpoint;
+
+ tbu= mfspr(TBR_TBU);
+ tbl= mfspr(TBR_TBL);
+ } while (tbu != mfspr(TBR_TBU));
+
+ // The construction of the unint64_t value does bias the mttb some:
+ // it assignes an earlier time than hoped for, given these later
+ // calculations. Easily observable on the example 7455 based PowerMac
+ // G4. (Faster than G5 tbr increment rate but a slower processor,)
+ // But the overall process is still an improvement.
+ //
+ // The pointers into tb technique would involve a pair of memory
+ // writes and a pair of memory reads instead, the writes being
+ // in the loop. The "+ . . ." would still be involved.
+ return ( ((uint64_t)tbu<<32) | tbl ) + delta_for_approx_match_to_bsp_tbr_values;
+#endif
+}
+
void
machdep_ap_bootstrap(void)
{
@@ -76,19 +158,76 @@
PCPU_SET(awake, 1);
__asm __volatile("msync; isync");
+#if defined(AIM)
+ powerpc_sync();
+ isync();
+ if (1==alternate_timebase_sync_style)
+ {
+ // Part of: Attempt a better-than-historical approximately
+ // equal timebase value for ap vs. bsp
+
+ // No claim to deal with overflow/wraparound of tbr, or even
+ // of the upper bit being on.
+
+ register_t oldmsr= intr_disable();
+
+ while (1u!=from_bsp_status_flag)
+ ; // spin waiting for bsp to flag that its ready to start.
+
+ // Start to measure a round trip:: to the bsp and back.
+
+ isync(); // Be sure below mftb() result is not from earlier speculative execution.
+ uint64_t const start_round_trip_time_on_ap= mftb_with_no_pointer_use();
+ atomic_store_rel_int(&from_ap_status_flag, 1u); // bsp waits for such before its mftb().
+
+ while (2u!=from_bsp_status_flag)
+ ; // spin waiting for bsp's tbr value
+
+ // Mid-point of ap round trip and the bsp timebase value should be approximately equal
+ // when the tbr's are well matched, absent interruptions on both sides.
+
+ isync(); // Be sure below mftb() result is not from earlier speculative execution.
+ uint64_t const end_round_trip_time_on_ap= mftb_with_no_pointer_use();
+ isync(); // Be sure above mftb() result is not from overlapping with the following.
+
+ int64_t const approx_round_trip_tbr_delta_on_ap
+ = (int64_t)end_round_trip_time_on_ap - (int64_t)start_round_trip_time_on_ap;
+ int64_t const ap_midpoint_value
+ = (int64_t)start_round_trip_time_on_ap + approx_round_trip_tbr_delta_on_ap/2;
+
+ // The mftb_plus_delta use is for helping to the control the code order relative
+ // to tbr access. Such issues are notable for the 7455 based 2-socket PowerMacs,
+ // for example. Faster tbr increment rate than the G5's but slower processors
+ // and such. Still, overall this definitely helps such contexts compared to the
+ // historical style of timebase synchronization.
+ isync(); // Be sure below mftb() result is not from earlier speculative execution.
+ mttb(mftb_plus_delta(&bsp_timebase_sample,ap_midpoint_value));
+
+ atomic_store_rel_int(&from_bsp_status_flag, 0u); // Get ready for next ap in bsp loop
+ atomic_store_rel_int(&from_ap_status_flag, 0u); // Flag bsp that this ap is done
+
+ mtmsr(oldmsr);
+ }
+#endif
+
while (ap_letgo == 0)
nop_prio_vlow();
nop_prio_medium();
- /*
- * Set timebase as soon as possible to meet an implicit rendezvous
- * from cpu_mp_unleash(), which sets ap_letgo and then immediately
- * sets timebase.
- *
- * Note that this is instrinsically racy and is only relevant on
- * platforms that do not support better mechanisms.
- */
- platform_smp_timebase_sync(ap_timebase, 1);
+#if defined(AIM)
+ if (0==alternate_timebase_sync_style)
+#endif
+ {
+ /*
+ * Set timebase as soon as possible to meet an implicit rendezvous
+ * from cpu_mp_unleash(), which sets ap_letgo and then immediately
+ * sets timebase.
+ *
+ * Note that this is instrinsically racy and is only relevant on
+ * platforms that do not support better mechanisms.
+ */
+ platform_smp_timebase_sync(ap_timebase, 1);
+ }
/* Give platform code a chance to do anything else necessary */
platform_smp_ap_init();
@@ -261,20 +400,56 @@
pc->pc_cpuid, (uintmax_t)pc->pc_hwref,
pc->pc_awake);
smp_cpus++;
+
+#if defined(AIM)
+ // Part of: Attempt a better-than-historical approximately
+ // equal timebase value for ap vs. bsp
+ powerpc_sync();
+ isync();
+ if (1==alternate_timebase_sync_style)
+ {
+ register_t oldmsr= intr_disable();
+
+ atomic_store_rel_int(&from_bsp_status_flag, 1u); // bsp ready to start.
+
+ while (1u!=from_ap_status_flag)
+ ; // spin waiting for ap to flag: time to send a tbr.
+
+ isync(); // Be sure below mftb() result is not from earlier.
+ bsp_timebase_sample= mftb_with_no_pointer_use();
+ atomic_store_rel_int(&from_bsp_status_flag, 2u); // bsp tbr available.
+
+ // Most of the rest of the usage is in machdep_ap_bootstrap,
+ // other than controling alternate_timebase_sync_style value.
+
+ while (0u!=from_ap_status_flag)
+ ; // spin waiting for ap to be done with the sample.
+
+ mtmsr(oldmsr);
+ }
+#endif
} else
CPU_SET(pc->pc_cpuid, &stopped_cpus);
}
ap_awake = 1;
- /* Provide our current DEC and TB values for APs */
- ap_timebase = mftb() + 10;
- __asm __volatile("msync; isync");
+#if defined(AIM)
+ if (0==alternate_timebase_sync_style)
+#endif
+ {
+ /* Provide our current DEC and TB values for APs */
+ ap_timebase = mftb() + 10;
+ __asm __volatile("msync; isync");
+ }
/* Let APs continue */
atomic_store_rel_int(&ap_letgo, 1);
- platform_smp_timebase_sync(ap_timebase, 0);
+#if defined(AIM)
+ if (0==alternate_timebase_sync_style)
+#endif
+ platform_smp_timebase_sync(ap_timebase, 0);
while (ap_awake < smp_cpus)
;
File Metadata
Details
Attached
Mime Type
text/plain
Expires
Tue, Apr 21, 2:11 PM (12 h, 6 m)
Storage Engine
blob
Storage Format
Raw Data
Storage Handle
31914840
Default Alt Text
D23376.id67333.diff (10 KB)
Attached To
Mode
D23376: Avoid having PowerMacs ending up with stuck-sleeping threads: force some boot-time TB value relationships across sockets/cores.
Attached
Detach File
Event Timeline
Log In to Comment