mirror of https://github.com/OpenTTD/OpenTTD.git
Remove: rdtsc and TIC/TOC based on CPU ticks (#11856)
Use TIC/TOC based on std::chrono instead. This information is also easier to compare with others, as although it depends on CPU, it means a bit more if "yours takes 4ms and mine takes 10ms".
This commit is contained in:
parent
fe035c306e
commit
fa8294ebe7
80
src/cpu.cpp
80
src/cpu.cpp
|
@ -12,86 +12,6 @@
|
||||||
|
|
||||||
#include "safeguards.h"
|
#include "safeguards.h"
|
||||||
|
|
||||||
#undef RDTSC_AVAILABLE
|
|
||||||
|
|
||||||
/* rdtsc for MSC_VER, uses simple inline assembly, or _rdtsc
|
|
||||||
* from external win64.asm because VS2005 does not support inline assembly */
|
|
||||||
#if defined(_MSC_VER) && !defined(RDTSC_AVAILABLE)
|
|
||||||
#include <intrin.h>
|
|
||||||
#include <windows.h>
|
|
||||||
uint64_t ottd_rdtsc()
|
|
||||||
{
|
|
||||||
#if defined(_M_ARM)
|
|
||||||
return __rdpmccntr64();
|
|
||||||
#elif defined(_M_ARM64)
|
|
||||||
return _ReadStatusReg(ARM64_PMCCNTR_EL0);
|
|
||||||
#else
|
|
||||||
return __rdtsc();
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
#define RDTSC_AVAILABLE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* rdtsc for all other *nix-en (hopefully). Use GCC syntax */
|
|
||||||
#if (defined(__i386__) || defined(__x86_64__)) && !defined(RDTSC_AVAILABLE)
|
|
||||||
uint64_t ottd_rdtsc()
|
|
||||||
{
|
|
||||||
uint32_t high, low;
|
|
||||||
__asm__ __volatile__ ("rdtsc" : "=a" (low), "=d" (high));
|
|
||||||
return ((uint64_t)high << 32) | low;
|
|
||||||
}
|
|
||||||
# define RDTSC_AVAILABLE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* rdtsc for PPC which has this not */
|
|
||||||
#if (defined(__POWERPC__) || defined(__powerpc__)) && !defined(RDTSC_AVAILABLE)
|
|
||||||
uint64_t ottd_rdtsc()
|
|
||||||
{
|
|
||||||
uint32_t high = 0, high2 = 0, low;
|
|
||||||
/* PPC does not have rdtsc, so we cheat by reading the two 32-bit time-counters
|
|
||||||
* it has, 'Move From Time Base (Upper)'. Since these are two reads, in the
|
|
||||||
* very unlikely event that the lower part overflows to the upper part while we
|
|
||||||
* read it; we double-check and reread the registers */
|
|
||||||
asm volatile (
|
|
||||||
"mftbu %0\n"
|
|
||||||
"mftb %1\n"
|
|
||||||
"mftbu %2\n"
|
|
||||||
"cmpw %3,%4\n"
|
|
||||||
"bne- $-16\n"
|
|
||||||
: "=r" (high), "=r" (low), "=r" (high2)
|
|
||||||
: "0" (high), "2" (high2)
|
|
||||||
);
|
|
||||||
return ((uint64_t)high << 32) | low;
|
|
||||||
}
|
|
||||||
# define RDTSC_AVAILABLE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* rdtsc for MCST Elbrus 2000 */
|
|
||||||
#if defined(__e2k__) && !defined(RDTSC_AVAILABLE)
|
|
||||||
uint64_t ottd_rdtsc()
|
|
||||||
{
|
|
||||||
uint64_t dst;
|
|
||||||
# pragma asm_inline
|
|
||||||
asm("rrd %%clkr, %0" : "=r" (dst));
|
|
||||||
return dst;
|
|
||||||
}
|
|
||||||
# define RDTSC_AVAILABLE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(__EMSCRIPTEN__) && !defined(RDTSC_AVAILABLE)
|
|
||||||
/* On emscripten doing TIC/TOC would be ill-advised */
|
|
||||||
uint64_t ottd_rdtsc() {return 0;}
|
|
||||||
# define RDTSC_AVAILABLE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* In all other cases we have no support for rdtsc. No major issue,
|
|
||||||
* you just won't be able to profile your code with TIC()/TOC() */
|
|
||||||
#if !defined(RDTSC_AVAILABLE)
|
|
||||||
#warning "(non-fatal) No support for rdtsc(), you won't be able to profile with TIC/TOC"
|
|
||||||
uint64_t ottd_rdtsc() {return 0;}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Definitions for CPU detection:
|
* Definitions for CPU detection:
|
||||||
*
|
*
|
||||||
|
|
|
@ -10,12 +10,6 @@
|
||||||
#ifndef CPU_H
|
#ifndef CPU_H
|
||||||
#define CPU_H
|
#define CPU_H
|
||||||
|
|
||||||
/**
|
|
||||||
* Get the tick counter from the CPU (high precision timing).
|
|
||||||
* @return The count.
|
|
||||||
*/
|
|
||||||
uint64_t ottd_rdtsc();
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get the CPUID information from the CPU.
|
* Get the CPUID information from the CPU.
|
||||||
* @param info The retrieved info. All zeros on architectures without CPUID.
|
* @param info The retrieved info. All zeros on architectures without CPUID.
|
||||||
|
|
23
src/debug.h
23
src/debug.h
|
@ -63,7 +63,8 @@ std::string GetDebugString();
|
||||||
/* Shorter form for passing filename and linenumber */
|
/* Shorter form for passing filename and linenumber */
|
||||||
#define FILE_LINE __FILE__, __LINE__
|
#define FILE_LINE __FILE__, __LINE__
|
||||||
|
|
||||||
/* Used for profiling
|
/**
|
||||||
|
* Used for profiling.
|
||||||
*
|
*
|
||||||
* Usage:
|
* Usage:
|
||||||
* TIC();
|
* TIC();
|
||||||
|
@ -84,30 +85,14 @@ std::string GetDebugString();
|
||||||
* TIC() / TOC() creates its own block, so make sure not the mangle
|
* TIC() / TOC() creates its own block, so make sure not the mangle
|
||||||
* it with another block.
|
* it with another block.
|
||||||
*
|
*
|
||||||
* The output is counted in CPU cycles, and not comparable across
|
* The output is counted in microseconds. Mainly useful for local optimisations.
|
||||||
* machines. Mainly useful for local optimisations.
|
|
||||||
**/
|
**/
|
||||||
#define TIC() {\
|
#define TIC() {\
|
||||||
uint64_t _xxx_ = ottd_rdtsc();\
|
|
||||||
static uint64_t _sum_ = 0;\
|
|
||||||
static uint32_t _i_ = 0;
|
|
||||||
|
|
||||||
#define TOC(str, count)\
|
|
||||||
_sum_ += ottd_rdtsc() - _xxx_;\
|
|
||||||
if (++_i_ == count) {\
|
|
||||||
Debug(misc, 0, "[{}] {} [avg: {:.1f}]", str, _sum_, _sum_/(double)_i_);\
|
|
||||||
_i_ = 0;\
|
|
||||||
_sum_ = 0;\
|
|
||||||
}\
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Chrono based version. The output is in microseconds. */
|
|
||||||
#define TICC() {\
|
|
||||||
auto _start_ = std::chrono::high_resolution_clock::now();\
|
auto _start_ = std::chrono::high_resolution_clock::now();\
|
||||||
static uint64_t _sum_ = 0;\
|
static uint64_t _sum_ = 0;\
|
||||||
static uint32_t _i_ = 0;
|
static uint32_t _i_ = 0;
|
||||||
|
|
||||||
#define TOCC(str, _count_)\
|
#define TOC(str, _count_)\
|
||||||
_sum_ += (std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - _start_)).count();\
|
_sum_ += (std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::high_resolution_clock::now() - _start_)).count();\
|
||||||
if (++_i_ == _count_) {\
|
if (++_i_ == _count_) {\
|
||||||
Debug(misc, 0, "[{}] {} us [avg: {:.1f} us]", str, _sum_, _sum_/(double)_i_);\
|
Debug(misc, 0, "[{}] {} us [avg: {:.1f} us]", str, _sum_, _sum_/(double)_i_);\
|
||||||
|
|
Loading…
Reference in New Issue