Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v2.x: Improve x86-64 timer performance #3748

Merged
merged 2 commits into from
Jul 6, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 4 additions & 21 deletions opal/include/opal/sys/x86_64/timer.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,31 +31,14 @@ typedef uint64_t opal_timer_t;

#if OPAL_GCC_INLINE_ASSEMBLY

/**
* http://www.intel.com/content/www/us/en/intelligent-systems/embedded-systems-training/ia-32-ia-64-benchmark-code-execution-paper.html
*/
/* TODO: add AMD mfence version and dispatch at init */
static inline opal_timer_t
opal_sys_timer_get_cycles(void)
{
unsigned l, h;
#if !OPAL_ASSEMBLY_SUPPORTS_RDTSCP
__asm__ __volatile__ ("cpuid\n\t"
uint32_t l, h;
__asm__ __volatile__ ("lfence\n\t"
"rdtsc\n\t"
: "=a" (l), "=d" (h)
:: "rbx", "rcx");
#else
/* If we need higher accuracy we should implement the algorithm proposed
* on the Intel document referenced above. However, in the context of MPI
* this function will be used as the backend for MPI_Wtime and as such
* can afford a small inaccuracy.
*/
__asm__ __volatile__ ("rdtscp\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t"
"cpuid\n\t"
: "=r" (h), "=r" (l)
:: "rax", "rbx", "rcx", "rdx");
#endif
: "=a" (l), "=d" (h));
return ((opal_timer_t)l) | (((opal_timer_t)h) << 32);
}

Expand Down
9 changes: 5 additions & 4 deletions opal/runtime/opal_progress.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
#include "opal/runtime/opal_params.h"

#define OPAL_PROGRESS_USE_TIMERS (OPAL_TIMER_CYCLE_SUPPORTED || OPAL_TIMER_USEC_SUPPORTED)
#define OPAL_PROGRESS_ONLY_USEC_NATIVE (OPAL_TIMER_USEC_NATIVE && !OPAL_TIMER_CYCLE_NATIVE)

#if OPAL_ENABLE_DEBUG
bool opal_progress_debug = false;
Expand Down Expand Up @@ -193,11 +194,11 @@ opal_progress(void)
if( opal_progress_event_flag != 0 ) {
#if OPAL_HAVE_WORKING_EVENTOPS
#if OPAL_PROGRESS_USE_TIMERS
#if OPAL_TIMER_USEC_NATIVE
#if OPAL_PROGRESS_ONLY_USEC_NATIVE
opal_timer_t now = opal_timer_base_get_usec();
#else
opal_timer_t now = opal_timer_base_get_cycles();
#endif /* OPAL_TIMER_USEC_NATIVE */
#endif /* OPAL_PROGRESS_ONLY_USEC_NATIVE */
/* trip the event library if we've reached our tick rate and we are
enabled */
if (now - event_progress_last_time > event_progress_delta ) {
Expand Down Expand Up @@ -320,7 +321,7 @@ opal_progress_set_event_poll_rate(int polltime)

#if OPAL_PROGRESS_USE_TIMERS
event_progress_delta = 0;
# if OPAL_TIMER_USEC_NATIVE
# if OPAL_PROGRESS_ONLY_USEC_NATIVE
event_progress_last_time = opal_timer_base_get_usec();
# else
event_progress_last_time = opal_timer_base_get_cycles();
Expand All @@ -347,7 +348,7 @@ opal_progress_set_event_poll_rate(int polltime)
#endif
}

#if OPAL_PROGRESS_USE_TIMERS && !OPAL_TIMER_USEC_NATIVE
#if OPAL_PROGRESS_USE_TIMERS && !OPAL_PROGRESS_ONLY_USEC_NATIVE
/* going to use cycles for counter. Adjust specified usec into cycles */
event_progress_delta = event_progress_delta * opal_timer_base_get_freq() / 1000000;
#endif
Expand Down