Skip to content

Commit

Permalink
amd64 timers: use lfence instead of cpuid for serialization
Browse files Browse the repository at this point in the history
Signed-off-by: Carlos Bederián <bc@famaf.unc.edu.ar>
  • Loading branch information
zzzoom committed Feb 4, 2017
1 parent 4009ba6 commit ccea3de
Showing 1 changed file with 4 additions and 21 deletions.
25 changes: 4 additions & 21 deletions opal/include/opal/sys/amd64/timer.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,31 +31,14 @@ typedef uint64_t opal_timer_t;

#if OPAL_GCC_INLINE_ASSEMBLY

/**
* http://www.intel.com/content/www/us/en/intelligent-systems/embedded-systems-training/ia-32-ia-64-benchmark-code-execution-paper.html
*/
/* TODO: add AMD mfence version and dispatch at init */
static inline opal_timer_t
opal_sys_timer_get_cycles(void)
{
unsigned l, h;
#if !OPAL_ASSEMBLY_SUPPORTS_RDTSCP
__asm__ __volatile__ ("cpuid\n\t"
uint32_t l, h;
__asm__ __volatile__ ("lfence\n\t"
"rdtsc\n\t"
: "=a" (l), "=d" (h)
:: "rbx", "rcx");
#else
/* If we need higher accuracy we should implement the algorithm proposed
* on the Intel document referenced above. However, in the context of MPI
* this function will be used as the backend for MPI_Wtime and as such
* can afford a small inaccuracy.
*/
__asm__ __volatile__ ("rdtscp\n\t"
"mov %%edx, %0\n\t"
"mov %%eax, %1\n\t"
"cpuid\n\t"
: "=r" (h), "=r" (l)
:: "rax", "rbx", "rcx", "rdx");
#endif
: "=a" (l), "=d" (h));
return ((opal_timer_t)l) | (((opal_timer_t)h) << 32);
}

Expand Down

0 comments on commit ccea3de

Please sign in to comment.