mirror of https://github.com/OpenRCT2/OpenRCT2.git
Merge pull request #4729 from xavery/bitcount-use-cpu
Use POPCNT, if available, to count the number of set bits
This commit is contained in:
commit
b568f002d4
|
@ -196,6 +196,9 @@ bool platform_check_steam_overlay_attached();
|
|||
|
||||
datetime64 platform_get_datetime_now_utc();
|
||||
|
||||
// Called very early in the program before parsing commandline arguments.
|
||||
void core_init();
|
||||
|
||||
// Windows specific definitions
|
||||
#ifdef __WINDOWS__
|
||||
#ifndef WIN32_LEAN_AND_MEAN
|
||||
|
|
|
@ -51,6 +51,8 @@ utf8 _openrctDataDirectoryPath[MAX_PATH] = { 0 };
|
|||
*/
|
||||
int main(int argc, const char **argv)
|
||||
{
|
||||
core_init();
|
||||
|
||||
int run_game = cmdline_run(argv, argc);
|
||||
if (run_game == 1)
|
||||
{
|
||||
|
|
|
@ -782,3 +782,8 @@ uint8 platform_get_currency_value(const char *currCode) {
|
|||
|
||||
return CURRENCY_POUNDS;
|
||||
}
|
||||
|
||||
void core_init()
|
||||
{
|
||||
bitcount_init();
|
||||
}
|
||||
|
|
|
@ -61,6 +61,8 @@ int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine
|
|||
{
|
||||
_dllModule = hInstance;
|
||||
|
||||
core_init();
|
||||
|
||||
int argc;
|
||||
char ** argv = (char**)windows_get_command_line_args(&argc);
|
||||
int runGame = cmdline_run((const char **)argv, argc);
|
||||
|
@ -86,6 +88,8 @@ int main(int argc, char *argv[])
|
|||
HINSTANCE hInstance = GetModuleHandle(NULL);
|
||||
_dllModule = hInstance;
|
||||
|
||||
core_init();
|
||||
|
||||
int runGame = cmdline_run((const char **)argv, argc);
|
||||
if (runGame == 1) {
|
||||
openrct2_launch();
|
||||
|
@ -124,6 +128,8 @@ __declspec(dllexport) int StartOpenRCT(HINSTANCE hInstance, HINSTANCE hPrevInsta
|
|||
_dllModule = GetModuleHandleA(OPENRCT2_DLL_MODULE_NAME);
|
||||
}
|
||||
|
||||
core_init();
|
||||
|
||||
// argv = CommandLineToArgvA(lpCmdLine, &argc);
|
||||
argv = (char**)windows_get_command_line_args(&argc);
|
||||
runGame = cmdline_run((const char **)argv, argc);
|
||||
|
|
|
@ -195,15 +195,74 @@ int bitscanforward(int source)
|
|||
#endif
|
||||
}
|
||||
|
||||
int bitcount(int source)
|
||||
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
|
||||
#include <cpuid.h>
|
||||
#define OpenRCT2_POPCNT_GNUC
|
||||
#elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (defined(_M_X64) || defined(_M_IX86)) // VS2008
|
||||
#include <nmmintrin.h>
|
||||
#define OpenRCT2_POPCNT_MSVC
|
||||
#endif
|
||||
|
||||
static bool bitcount_popcnt_available()
|
||||
{
|
||||
int result = 0;
|
||||
for (int i = 0; i < 32; i++) {
|
||||
if (source & (1u << i)) {
|
||||
result++;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
// POPCNT support is declared as the 23rd bit of ECX with CPUID(EAX = 1).
|
||||
#if defined(OpenRCT2_POPCNT_GNUC)
|
||||
// we could use __builtin_cpu_supports, but it requires runtime support from
|
||||
// the compiler's library, which clang doesn't have yet.
|
||||
unsigned int eax, ebx, ecx = 0, edx; // avoid "maybe uninitialized"
|
||||
__get_cpuid(1, &eax, &ebx, &ecx, &edx);
|
||||
return (ecx & (1 << 23));
|
||||
#elif defined(OpenRCT2_POPCNT_MSVC)
|
||||
int regs[4];
|
||||
__cpuid(regs, 1);
|
||||
return (regs[2] & (1 << 23));
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int bitcount_popcnt(uint32 source)
|
||||
{
|
||||
#if defined(OpenRCT2_POPCNT_GNUC)
|
||||
// use asm directly in order to actually emit the instruction : using
|
||||
// __builtin_popcount results in an extra call to a library function.
|
||||
int rv;
|
||||
asm volatile ("popcnt %1,%0" : "=r"(rv) : "rm"(source) : "cc");
|
||||
return rv;
|
||||
#elif defined(OpenRCT2_POPCNT_MSVC)
|
||||
return _mm_popcnt_u32(source);
|
||||
#else
|
||||
openrct2_assert(false, "bitcount_popcnt() called, without support compiled in");
|
||||
return INT_MAX;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int bitcount_lut(uint32 source)
|
||||
{
|
||||
// https://graphics.stanford.edu/~seander/bithacks.html
|
||||
static const unsigned char BitsSetTable256[256] =
|
||||
{
|
||||
#define B2(n) n, n+1, n+1, n+2
|
||||
#define B4(n) B2(n), B2(n+1), B2(n+1), B2(n+2)
|
||||
#define B6(n) B4(n), B4(n+1), B4(n+1), B4(n+2)
|
||||
B6(0), B6(1), B6(1), B6(2)
|
||||
};
|
||||
return BitsSetTable256[source & 0xff] +
|
||||
BitsSetTable256[(source >> 8) & 0xff] +
|
||||
BitsSetTable256[(source >> 16) & 0xff] +
|
||||
BitsSetTable256[source >> 24];
|
||||
}
|
||||
|
||||
static int(*bitcount_fn)(uint32);
|
||||
|
||||
void bitcount_init()
|
||||
{
|
||||
bitcount_fn = bitcount_popcnt_available() ? bitcount_popcnt : bitcount_lut;
|
||||
}
|
||||
|
||||
int bitcount(uint32 source)
|
||||
{
|
||||
return bitcount_fn(source);
|
||||
}
|
||||
|
||||
bool strequals(const char *a, const char *b, int length, bool caseInsensitive)
|
||||
|
|
|
@ -38,7 +38,8 @@ void path_end_with_separator(utf8 *path, size_t size);
|
|||
bool readentirefile(const utf8 *path, void **outBuffer, size_t *outLength);
|
||||
|
||||
int bitscanforward(int source);
|
||||
int bitcount(int source);
|
||||
void bitcount_init();
|
||||
int bitcount(uint32 source);
|
||||
bool strequals(const char *a, const char *b, int length, bool caseInsensitive);
|
||||
int strcicmp(char const *a, char const *b);
|
||||
int strlogicalcmp(char const *a, char const *b);
|
||||
|
|
Loading…
Reference in New Issue