diff --git a/opal/include/opal/sys/amd64/timer.h b/opal/include/opal/sys/amd64/timer.h index 56b4e542955..a367f772e75 100644 --- a/opal/include/opal/sys/amd64/timer.h +++ b/opal/include/opal/sys/amd64/timer.h @@ -31,31 +31,14 @@ typedef uint64_t opal_timer_t; #if OPAL_GCC_INLINE_ASSEMBLY -/** - * http://www.intel.com/content/www/us/en/intelligent-systems/embedded-systems-training/ia-32-ia-64-benchmark-code-execution-paper.html - */ +/* TODO: add AMD mfence version and dispatch at init */ static inline opal_timer_t opal_sys_timer_get_cycles(void) { - unsigned l, h; -#if !OPAL_ASSEMBLY_SUPPORTS_RDTSCP - __asm__ __volatile__ ("cpuid\n\t" + uint32_t l, h; + __asm__ __volatile__ ("lfence\n\t" "rdtsc\n\t" - : "=a" (l), "=d" (h) - :: "rbx", "rcx"); -#else - /* If we need higher accuracy we should implement the algorithm proposed - * on the Intel document referenced above. However, in the context of MPI - * this function will be used as the backend for MPI_Wtime and as such - * can afford a small inaccuracy. - */ - __asm__ __volatile__ ("rdtscp\n\t" - "mov %%edx, %0\n\t" - "mov %%eax, %1\n\t" - "cpuid\n\t" - : "=r" (h), "=r" (l) - :: "rax", "rbx", "rcx", "rdx"); -#endif + : "=a" (l), "=d" (h)); return ((opal_timer_t)l) | (((opal_timer_t)h) << 32); }