From bc101f41511c676ea0ce4c4699b496d4210cd9b8 Mon Sep 17 00:00:00 2001 From: Daniel Kamil Kozar Date: Mon, 31 Oct 2016 00:13:19 +0100 Subject: [PATCH 1/5] Use POPCNT, if available, to count the number of set bits Replace the current implementation of bitcount() with a one that uses the POPCNT instruction available in most newer CPUs. Also, replace the basic implementation with a one based on a lookup table, which has much better performance than the old one. --- src/util/util.c | 69 ++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 63 insertions(+), 6 deletions(-) diff --git a/src/util/util.c b/src/util/util.c index 2afe119704..e988d7ec5a 100644 --- a/src/util/util.c +++ b/src/util/util.c @@ -195,15 +195,72 @@ int bitscanforward(int source) #endif } +#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) + #include + #define OpenRCT2_POPCNT_GNUC +#elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (defined(_M_X64) || defined(_M_IX86)) // VS2008 + #include + #define OpenRCT2_POPCNT_MSVC +#endif + +static int bitcount_available(void) +{ + // POPCNT support is declared as the 23rd bit of ECX with CPUID(EAX = 1). + #if defined(OpenRCT2_POPCNT_GNUC) + // we could use __builtin_cpu_supports, but it requires runtime support from + // the compiler's library, which clang doesn't have yet. + unsigned int eax, ebx, ecx = 0, edx; // avoid "maybe uninitialized" + __get_cpuid(1, &eax, &ebx, &ecx, &edx); + return (ecx & (1 << 23)); + #elif defined(OpenRCT2_POPCNT_MSVC) + int regs[4]; + __cpuid(regs, 1); + return (regs[2] & (1 << 23)); + #else + return 0; + #endif +} + +static int bitcount_popcnt(int source) +{ + #if defined(OpenRCT2_POPCNT_GNUC) + // use asm directly in order to actually emit the instruction : using + // __builtin_popcount results in an extra call to a library function. + int rv; + asm volatile ("popcnt %1,%0" : "=r"(rv) : "rm"(source) : "cc"); + return rv; + #elif defined(OpenRCT2_POPCNT_MSVC) + return __popcnt(source); + #else + assert(false && "bitcount_popcnt() called, without support compiled in"); + return INT_MAX; + #endif +} + +static int bitcount_lut(int source) +{ + // https://graphics.stanford.edu/~seander/bithacks.html + static const unsigned char BitsSetTable256[256] = + { + #define B2(n) n, n+1, n+1, n+2 + #define B4(n) B2(n), B2(n+1), B2(n+1), B2(n+2) + #define B6(n) B4(n), B4(n+1), B4(n+1), B4(n+2) + B6(0), B6(1), B6(1), B6(2) + }; + return BitsSetTable256[source & 0xff] + + BitsSetTable256[(source >> 8) & 0xff] + + BitsSetTable256[(source >> 16) & 0xff] + + BitsSetTable256[source >> 24]; +} + int bitcount(int source) { - int result = 0; - for (int i = 0; i < 32; i++) { - if (source & (1u << i)) { - result++; - } + static int(*bitcount_fn)(int); + if(bitcount_fn == 0) + { + bitcount_fn = bitcount_available() ? bitcount_popcnt : bitcount_lut; } - return result; + return bitcount_fn(source); } bool strequals(const char *a, const char *b, int length, bool caseInsensitive) From 46b6ff35a04650a2f44b078c4f3985210784f7c6 Mon Sep 17 00:00:00 2001 From: Daniel Kamil Kozar Date: Mon, 31 Oct 2016 00:45:30 +0100 Subject: [PATCH 2/5] Initialise the pointer to bitcount_fn in a new early initialisation function In order to avoid the overhead of checking whether the function pointer to bitcount's actual implementation has been initialised every time bitcount is called, initialise it at application startup. --- src/platform/platform.h | 3 +++ src/platform/posix.c | 2 ++ src/platform/shared.c | 5 +++++ src/platform/windows.c | 6 ++++++ src/util/util.c | 12 +++++++----- src/util/util.h | 1 + 6 files changed, 24 insertions(+), 5 deletions(-) diff --git a/src/platform/platform.h b/src/platform/platform.h index 0c249c370b..b127969ec8 100644 --- a/src/platform/platform.h +++ b/src/platform/platform.h @@ -196,6 +196,9 @@ bool platform_check_steam_overlay_attached(); datetime64 platform_get_datetime_now_utc(); +// Called very early in the program before parsing commandline arguments. +void core_init(); + // Windows specific definitions #ifdef __WINDOWS__ #ifndef WIN32_LEAN_AND_MEAN diff --git a/src/platform/posix.c b/src/platform/posix.c index c549f55a47..75673e3e6a 100644 --- a/src/platform/posix.c +++ b/src/platform/posix.c @@ -51,6 +51,8 @@ utf8 _openrctDataDirectoryPath[MAX_PATH] = { 0 }; */ int main(int argc, const char **argv) { + core_init(); + int run_game = cmdline_run(argv, argc); if (run_game == 1) { diff --git a/src/platform/shared.c b/src/platform/shared.c index 395afdc2f7..7e2db30792 100644 --- a/src/platform/shared.c +++ b/src/platform/shared.c @@ -782,3 +782,8 @@ uint8 platform_get_currency_value(const char *currCode) { return CURRENCY_POUNDS; } + +void core_init() +{ + bitcount_init(); +} diff --git a/src/platform/windows.c b/src/platform/windows.c index 82fe20673b..32fe6fb427 100644 --- a/src/platform/windows.c +++ b/src/platform/windows.c @@ -61,6 +61,8 @@ int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine { _dllModule = hInstance; + core_init(); + int argc; char ** argv = (char**)windows_get_command_line_args(&argc); int runGame = cmdline_run((const char **)argv, argc); @@ -86,6 +88,8 @@ int main(int argc, char *argv[]) HINSTANCE hInstance = GetModuleHandle(NULL); _dllModule = hInstance; + core_init(); + int runGame = cmdline_run((const char **)argv, argc); if (runGame == 1) { openrct2_launch(); @@ -124,6 +128,8 @@ __declspec(dllexport) int StartOpenRCT(HINSTANCE hInstance, HINSTANCE hPrevInsta _dllModule = GetModuleHandleA(OPENRCT2_DLL_MODULE_NAME); } + core_init(); + // argv = CommandLineToArgvA(lpCmdLine, &argc); argv = (char**)windows_get_command_line_args(&argc); runGame = cmdline_run((const char **)argv, argc); diff --git a/src/util/util.c b/src/util/util.c index e988d7ec5a..1cb56a3511 100644 --- a/src/util/util.c +++ b/src/util/util.c @@ -253,13 +253,15 @@ static int bitcount_lut(int source) BitsSetTable256[source >> 24]; } +static int(*bitcount_fn)(int); + +void bitcount_init(void) +{ + bitcount_fn = bitcount_available() ? bitcount_popcnt : bitcount_lut; +} + int bitcount(int source) { - static int(*bitcount_fn)(int); - if(bitcount_fn == 0) - { - bitcount_fn = bitcount_available() ? bitcount_popcnt : bitcount_lut; - } return bitcount_fn(source); } diff --git a/src/util/util.h b/src/util/util.h index 261596ba3f..46e931b4f2 100644 --- a/src/util/util.h +++ b/src/util/util.h @@ -38,6 +38,7 @@ void path_end_with_separator(utf8 *path, size_t size); bool readentirefile(const utf8 *path, void **outBuffer, size_t *outLength); int bitscanforward(int source); +void bitcount_init(void); int bitcount(int source); bool strequals(const char *a, const char *b, int length, bool caseInsensitive); int strcicmp(char const *a, char const *b); From d7606c8fa3586026141f76011502c843a1474e4b Mon Sep 17 00:00:00 2001 From: Daniel Kamil Kozar Date: Mon, 31 Oct 2016 00:58:22 +0100 Subject: [PATCH 3/5] Remove void from argument lists in new bitcount_ functions Keep it in line with the rest of the C code. --- src/util/util.c | 6 +++--- src/util/util.h | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/util/util.c b/src/util/util.c index 1cb56a3511..5a81caccb8 100644 --- a/src/util/util.c +++ b/src/util/util.c @@ -203,7 +203,7 @@ int bitscanforward(int source) #define OpenRCT2_POPCNT_MSVC #endif -static int bitcount_available(void) +static int bitcount_popcnt_available() { // POPCNT support is declared as the 23rd bit of ECX with CPUID(EAX = 1). #if defined(OpenRCT2_POPCNT_GNUC) @@ -255,9 +255,9 @@ static int bitcount_lut(int source) static int(*bitcount_fn)(int); -void bitcount_init(void) +void bitcount_init() { - bitcount_fn = bitcount_available() ? bitcount_popcnt : bitcount_lut; + bitcount_fn = bitcount_popcnt_available() ? bitcount_popcnt : bitcount_lut; } int bitcount(int source) diff --git a/src/util/util.h b/src/util/util.h index 46e931b4f2..0132acf728 100644 --- a/src/util/util.h +++ b/src/util/util.h @@ -38,7 +38,7 @@ void path_end_with_separator(utf8 *path, size_t size); bool readentirefile(const utf8 *path, void **outBuffer, size_t *outLength); int bitscanforward(int source); -void bitcount_init(void); +void bitcount_init(); int bitcount(int source); bool strequals(const char *a, const char *b, int length, bool caseInsensitive); int strcicmp(char const *a, char const *b); From b68b6f731b5053c7ede572b2d9a013d1f9fe31e2 Mon Sep 17 00:00:00 2001 From: Daniel Kamil Kozar Date: Mon, 31 Oct 2016 01:06:18 +0100 Subject: [PATCH 4/5] Use bool instead of int for bitcount_popcnt_available's return value --- src/util/util.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/util/util.c b/src/util/util.c index 5a81caccb8..0c89f91d1d 100644 --- a/src/util/util.c +++ b/src/util/util.c @@ -203,7 +203,7 @@ int bitscanforward(int source) #define OpenRCT2_POPCNT_MSVC #endif -static int bitcount_popcnt_available() +static bool bitcount_popcnt_available() { // POPCNT support is declared as the 23rd bit of ECX with CPUID(EAX = 1). #if defined(OpenRCT2_POPCNT_GNUC) @@ -217,7 +217,7 @@ static int bitcount_popcnt_available() __cpuid(regs, 1); return (regs[2] & (1 << 23)); #else - return 0; + return false; #endif } From 55f1d3aac64ab7ae3ae593e046916a3f22f49fa6 Mon Sep 17 00:00:00 2001 From: Daniel Kamil Kozar Date: Tue, 1 Nov 2016 11:42:36 +0100 Subject: [PATCH 5/5] Fixes to the new bitcount implementation Use Intel-standardized _mm_popcnt_u32 instead of Microsoft-specific __popcnt, replace assert with openrct2_assert, replace bitcount's argument with uint32. --- src/util/util.c | 14 +++++++------- src/util/util.h | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/util/util.c b/src/util/util.c index 0c89f91d1d..ca79080d65 100644 --- a/src/util/util.c +++ b/src/util/util.c @@ -199,7 +199,7 @@ int bitscanforward(int source) #include #define OpenRCT2_POPCNT_GNUC #elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (defined(_M_X64) || defined(_M_IX86)) // VS2008 - #include + #include #define OpenRCT2_POPCNT_MSVC #endif @@ -221,7 +221,7 @@ static bool bitcount_popcnt_available() #endif } -static int bitcount_popcnt(int source) +static int bitcount_popcnt(uint32 source) { #if defined(OpenRCT2_POPCNT_GNUC) // use asm directly in order to actually emit the instruction : using @@ -230,14 +230,14 @@ static int bitcount_popcnt(int source) asm volatile ("popcnt %1,%0" : "=r"(rv) : "rm"(source) : "cc"); return rv; #elif defined(OpenRCT2_POPCNT_MSVC) - return __popcnt(source); + return _mm_popcnt_u32(source); #else - assert(false && "bitcount_popcnt() called, without support compiled in"); + openrct2_assert(false, "bitcount_popcnt() called, without support compiled in"); return INT_MAX; #endif } -static int bitcount_lut(int source) +static int bitcount_lut(uint32 source) { // https://graphics.stanford.edu/~seander/bithacks.html static const unsigned char BitsSetTable256[256] = @@ -253,14 +253,14 @@ static int bitcount_lut(int source) BitsSetTable256[source >> 24]; } -static int(*bitcount_fn)(int); +static int(*bitcount_fn)(uint32); void bitcount_init() { bitcount_fn = bitcount_popcnt_available() ? bitcount_popcnt : bitcount_lut; } -int bitcount(int source) +int bitcount(uint32 source) { return bitcount_fn(source); } diff --git a/src/util/util.h b/src/util/util.h index 0132acf728..f6ffdc66f5 100644 --- a/src/util/util.h +++ b/src/util/util.h @@ -39,7 +39,7 @@ bool readentirefile(const utf8 *path, void **outBuffer, size_t *outLength); int bitscanforward(int source); void bitcount_init(); -int bitcount(int source); +int bitcount(uint32 source); bool strequals(const char *a, const char *b, int length, bool caseInsensitive); int strcicmp(char const *a, char const *b); int strlogicalcmp(char const *a, char const *b);