Merge pull request #4729 from xavery/bitcount-use-cpu

Use POPCNT, if available, to count the number of set bits
This commit is contained in:
Ted John 2016-11-01 13:26:39 +00:00 committed by GitHub
commit b568f002d4
6 changed files with 85 additions and 9 deletions

View File

@ -196,6 +196,9 @@ bool platform_check_steam_overlay_attached();
datetime64 platform_get_datetime_now_utc();
// Called very early in the program before parsing commandline arguments.
void core_init();
// Windows specific definitions
#ifdef __WINDOWS__
#ifndef WIN32_LEAN_AND_MEAN

View File

@ -51,6 +51,8 @@ utf8 _openrctDataDirectoryPath[MAX_PATH] = { 0 };
*/
int main(int argc, const char **argv)
{
core_init();
int run_game = cmdline_run(argv, argc);
if (run_game == 1)
{

View File

@ -782,3 +782,8 @@ uint8 platform_get_currency_value(const char *currCode) {
return CURRENCY_POUNDS;
}
void core_init()
{
bitcount_init();
}

View File

@ -61,6 +61,8 @@ int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance, LPSTR lpCmdLine
{
_dllModule = hInstance;
core_init();
int argc;
char ** argv = (char**)windows_get_command_line_args(&argc);
int runGame = cmdline_run((const char **)argv, argc);
@ -86,6 +88,8 @@ int main(int argc, char *argv[])
HINSTANCE hInstance = GetModuleHandle(NULL);
_dllModule = hInstance;
core_init();
int runGame = cmdline_run((const char **)argv, argc);
if (runGame == 1) {
openrct2_launch();
@ -124,6 +128,8 @@ __declspec(dllexport) int StartOpenRCT(HINSTANCE hInstance, HINSTANCE hPrevInsta
_dllModule = GetModuleHandleA(OPENRCT2_DLL_MODULE_NAME);
}
core_init();
// argv = CommandLineToArgvA(lpCmdLine, &argc);
argv = (char**)windows_get_command_line_args(&argc);
runGame = cmdline_run((const char **)argv, argc);

View File

@ -195,15 +195,74 @@ int bitscanforward(int source)
#endif
}
int bitcount(int source)
#if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
#include <cpuid.h>
#define OpenRCT2_POPCNT_GNUC
#elif defined(_MSC_VER) && (_MSC_VER >= 1500) && (defined(_M_X64) || defined(_M_IX86)) // VS2008
#include <nmmintrin.h>
#define OpenRCT2_POPCNT_MSVC
#endif
static bool bitcount_popcnt_available()
{
int result = 0;
for (int i = 0; i < 32; i++) {
if (source & (1u << i)) {
result++;
}
}
return result;
// POPCNT support is declared as the 23rd bit of ECX with CPUID(EAX = 1).
#if defined(OpenRCT2_POPCNT_GNUC)
// we could use __builtin_cpu_supports, but it requires runtime support from
// the compiler's library, which clang doesn't have yet.
unsigned int eax, ebx, ecx = 0, edx; // avoid "maybe uninitialized"
__get_cpuid(1, &eax, &ebx, &ecx, &edx);
return (ecx & (1 << 23));
#elif defined(OpenRCT2_POPCNT_MSVC)
int regs[4];
__cpuid(regs, 1);
return (regs[2] & (1 << 23));
#else
return false;
#endif
}
static int bitcount_popcnt(uint32 source)
{
#if defined(OpenRCT2_POPCNT_GNUC)
// use asm directly in order to actually emit the instruction : using
// __builtin_popcount results in an extra call to a library function.
int rv;
asm volatile ("popcnt %1,%0" : "=r"(rv) : "rm"(source) : "cc");
return rv;
#elif defined(OpenRCT2_POPCNT_MSVC)
return _mm_popcnt_u32(source);
#else
openrct2_assert(false, "bitcount_popcnt() called, without support compiled in");
return INT_MAX;
#endif
}
static int bitcount_lut(uint32 source)
{
// https://graphics.stanford.edu/~seander/bithacks.html
static const unsigned char BitsSetTable256[256] =
{
#define B2(n) n, n+1, n+1, n+2
#define B4(n) B2(n), B2(n+1), B2(n+1), B2(n+2)
#define B6(n) B4(n), B4(n+1), B4(n+1), B4(n+2)
B6(0), B6(1), B6(1), B6(2)
};
return BitsSetTable256[source & 0xff] +
BitsSetTable256[(source >> 8) & 0xff] +
BitsSetTable256[(source >> 16) & 0xff] +
BitsSetTable256[source >> 24];
}
static int(*bitcount_fn)(uint32);
void bitcount_init()
{
bitcount_fn = bitcount_popcnt_available() ? bitcount_popcnt : bitcount_lut;
}
int bitcount(uint32 source)
{
return bitcount_fn(source);
}
bool strequals(const char *a, const char *b, int length, bool caseInsensitive)

View File

@ -38,7 +38,8 @@ void path_end_with_separator(utf8 *path, size_t size);
bool readentirefile(const utf8 *path, void **outBuffer, size_t *outLength);
int bitscanforward(int source);
int bitcount(int source);
void bitcount_init();
int bitcount(uint32 source);
bool strequals(const char *a, const char *b, int length, bool caseInsensitive);
int strcicmp(char const *a, char const *b);
int strlogicalcmp(char const *a, char const *b);