diff --git a/opal/include/opal/sys/x86_64/timer.h b/opal/include/opal/sys/x86_64/timer.h index 56b4e542955..a367f772e75 100644 --- a/opal/include/opal/sys/x86_64/timer.h +++ b/opal/include/opal/sys/x86_64/timer.h @@ -31,31 +31,14 @@ typedef uint64_t opal_timer_t; #if OPAL_GCC_INLINE_ASSEMBLY -/** - * http://www.intel.com/content/www/us/en/intelligent-systems/embedded-systems-training/ia-32-ia-64-benchmark-code-execution-paper.html - */ +/* TODO: add AMD mfence version and dispatch at init */ static inline opal_timer_t opal_sys_timer_get_cycles(void) { - unsigned l, h; -#if !OPAL_ASSEMBLY_SUPPORTS_RDTSCP - __asm__ __volatile__ ("cpuid\n\t" + uint32_t l, h; + __asm__ __volatile__ ("lfence\n\t" "rdtsc\n\t" - : "=a" (l), "=d" (h) - :: "rbx", "rcx"); -#else - /* If we need higher accuracy we should implement the algorithm proposed - * on the Intel document referenced above. However, in the context of MPI - * this function will be used as the backend for MPI_Wtime and as such - * can afford a small inaccuracy. - */ - __asm__ __volatile__ ("rdtscp\n\t" - "mov %%edx, %0\n\t" - "mov %%eax, %1\n\t" - "cpuid\n\t" - : "=r" (h), "=r" (l) - :: "rax", "rbx", "rcx", "rdx"); -#endif + : "=a" (l), "=d" (h)); return ((opal_timer_t)l) | (((opal_timer_t)h) << 32); } diff --git a/opal/runtime/opal_progress.c b/opal/runtime/opal_progress.c index d78402adc46..d0a50916463 100644 --- a/opal/runtime/opal_progress.c +++ b/opal/runtime/opal_progress.c @@ -37,6 +37,7 @@ #include "opal/runtime/opal_params.h" #define OPAL_PROGRESS_USE_TIMERS (OPAL_TIMER_CYCLE_SUPPORTED || OPAL_TIMER_USEC_SUPPORTED) +#define OPAL_PROGRESS_ONLY_USEC_NATIVE (OPAL_TIMER_USEC_NATIVE && !OPAL_TIMER_CYCLE_NATIVE) #if OPAL_ENABLE_DEBUG bool opal_progress_debug = false; @@ -193,11 +194,11 @@ opal_progress(void) if( opal_progress_event_flag != 0 ) { #if OPAL_HAVE_WORKING_EVENTOPS #if OPAL_PROGRESS_USE_TIMERS -#if OPAL_TIMER_USEC_NATIVE +#if OPAL_PROGRESS_ONLY_USEC_NATIVE opal_timer_t now = opal_timer_base_get_usec(); #else opal_timer_t now = opal_timer_base_get_cycles(); -#endif /* OPAL_TIMER_USEC_NATIVE */ +#endif /* OPAL_PROGRESS_ONLY_USEC_NATIVE */ /* trip the event library if we've reached our tick rate and we are enabled */ if (now - event_progress_last_time > event_progress_delta ) { @@ -320,7 +321,7 @@ opal_progress_set_event_poll_rate(int polltime) #if OPAL_PROGRESS_USE_TIMERS event_progress_delta = 0; -# if OPAL_TIMER_USEC_NATIVE +# if OPAL_PROGRESS_ONLY_USEC_NATIVE event_progress_last_time = opal_timer_base_get_usec(); # else event_progress_last_time = opal_timer_base_get_cycles(); @@ -347,7 +348,7 @@ opal_progress_set_event_poll_rate(int polltime) #endif } -#if OPAL_PROGRESS_USE_TIMERS && !OPAL_TIMER_USEC_NATIVE +#if OPAL_PROGRESS_USE_TIMERS && !OPAL_PROGRESS_ONLY_USEC_NATIVE /* going to use cycles for counter. Adjust specified usec into cycles */ event_progress_delta = event_progress_delta * opal_timer_base_get_freq() / 1000000; #endif