@@ -127,6 +127,32 @@ static __always_inline unsigned long long rdtsc(void)
127127 return EAX_EDX_VAL (val , low , high );
128128}
129129
130+ /**
131+ * rdtsc_ordered() - read the current TSC in program order
132+ *
133+ * rdtsc_ordered() returns the result of RDTSC as a 64-bit integer.
134+ * It is ordered like a load to a global in-memory counter. It should
135+ * be impossible to observe non-monotonic rdtsc_unordered() behavior
136+ * across multiple CPUs as long as the TSC is synced.
137+ */
138+ static __always_inline unsigned long long rdtsc_ordered (void )
139+ {
140+ /*
141+ * The RDTSC instruction is not ordered relative to memory
142+ * access. The Intel SDM and the AMD APM are both vague on this
143+ * point, but empirically an RDTSC instruction can be
144+ * speculatively executed before prior loads. An RDTSC
145+ * immediately after an appropriate barrier appears to be
146+ * ordered as a normal load, that is, it provides the same
147+ * ordering guarantees as reading from a global memory location
148+ * that some other imaginary CPU is updating continuously with a
149+ * time stamp.
150+ */
151+ alternative_2 ("" , "mfence" , X86_FEATURE_MFENCE_RDTSC ,
152+ "lfence" , X86_FEATURE_LFENCE_RDTSC );
153+ return rdtsc ();
154+ }
155+
130156static inline unsigned long long native_read_pmc (int counter )
131157{
132158 DECLARE_ARGS (val , low , high );
0 commit comments