mirror of https://github.com/OpenRCT2/OpenRCT2.git
Optimise SawyerChunkReader for MSVC debug builds
- Change std::copy_n and std::fill_n back to std::memcpy and std::memset. They do not have the overhead of checks. - Change std::malloc to HeapAlloc as 16 MiB allocations are very slow due to it initialising all the memory to 0xCC.
This commit is contained in:
parent
3d98e1ad1d
commit
c28a42d877
|
@ -1,4 +1,4 @@
|
|||
#pragma region Copyright (c) 2014-2017 OpenRCT2 Developers
|
||||
#pragma region Copyright (c) 2014-2018 OpenRCT2 Developers
|
||||
/*****************************************************************************
|
||||
* OpenRCT2, an open source clone of Roller Coaster Tycoon 2.
|
||||
*
|
||||
|
@ -14,12 +14,17 @@
|
|||
*****************************************************************************/
|
||||
#pragma endregion
|
||||
|
||||
#include <algorithm>
|
||||
#include "../core/IStream.hpp"
|
||||
#include "../core/Math.hpp"
|
||||
#include "../core/Memory.hpp"
|
||||
#include "SawyerChunkReader.h"
|
||||
|
||||
// malloc is very slow for large allocations in MSVC debug builds as it allocates
|
||||
// memory on a special debug heap and then initialises all the memory to 0xCC.
|
||||
#if defined(_WIN32) && defined(DEBUG)
|
||||
#define __USE_HEAP_ALLOC__
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
// Allow chunks to be uncompressed to a maximum of 16 MiB
|
||||
constexpr size_t MAX_UNCOMPRESSED_CHUNK_SIZE = 16 * 1024 * 1024;
|
||||
|
||||
|
@ -74,22 +79,10 @@ std::shared_ptr<SawyerChunk> SawyerChunkReader::ReadChunk()
|
|||
throw SawyerChunkException(EXCEPTION_MSG_CORRUPT_CHUNK_SIZE);
|
||||
}
|
||||
|
||||
// Allow 16MiB for chunk data
|
||||
size_t bufferSize = MAX_UNCOMPRESSED_CHUNK_SIZE;
|
||||
uint8 * buffer = Memory::Allocate<uint8>(bufferSize);
|
||||
if (buffer == nullptr)
|
||||
{
|
||||
throw std::runtime_error("Unable to allocate buffer.");
|
||||
}
|
||||
|
||||
size_t uncompressedLength = DecodeChunk(buffer, bufferSize, compressedData.get(), header);
|
||||
auto buffer = (uint8 *)AllocateLargeTempBuffer();
|
||||
size_t uncompressedLength = DecodeChunk(buffer, MAX_UNCOMPRESSED_CHUNK_SIZE, compressedData.get(), header);
|
||||
Guard::Assert(uncompressedLength != 0, "Encountered zero-sized chunk!");
|
||||
buffer = Memory::Reallocate(buffer, uncompressedLength);
|
||||
if (buffer == nullptr)
|
||||
{
|
||||
throw std::runtime_error("Unable to reallocate buffer.");
|
||||
}
|
||||
|
||||
buffer = (uint8 *)FinaliseLargeTempBuffer(buffer, uncompressedLength);
|
||||
return std::make_shared<SawyerChunk>((SAWYER_ENCODING)header.encoding, buffer, uncompressedLength);
|
||||
}
|
||||
default:
|
||||
|
@ -111,16 +104,16 @@ void SawyerChunkReader::ReadChunk(void * dst, size_t length)
|
|||
auto chunkLength = chunk->GetLength();
|
||||
if (chunkLength > length)
|
||||
{
|
||||
std::copy_n(chunkData, length, (uint8 *)dst);
|
||||
std::memcpy(dst, chunkData, length);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::copy_n(chunkData, chunkLength, (uint8 *)dst);
|
||||
std::memcpy(dst, chunkData, chunkLength);
|
||||
auto remainingLength = length - chunkLength;
|
||||
if (remainingLength > 0)
|
||||
{
|
||||
auto offset = (uint8 *)dst + chunkLength;
|
||||
std::fill_n(offset, remainingLength, 0);
|
||||
std::memset(offset, 0, remainingLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -135,7 +128,7 @@ size_t SawyerChunkReader::DecodeChunk(void * dst, size_t dstCapacity, const void
|
|||
{
|
||||
throw SawyerChunkException(EXCEPTION_MSG_DESTINATION_TOO_SMALL);
|
||||
}
|
||||
std::copy_n((const uint8 *)src, header.length, (uint8 *)dst);
|
||||
std::memcpy(dst, src, header.length);
|
||||
resultLength = header.length;
|
||||
break;
|
||||
case CHUNK_ENCODING_RLE:
|
||||
|
@ -155,10 +148,11 @@ size_t SawyerChunkReader::DecodeChunk(void * dst, size_t dstCapacity, const void
|
|||
|
||||
size_t SawyerChunkReader::DecodeChunkRLERepeat(void * dst, size_t dstCapacity, const void * src, size_t srcLength)
|
||||
{
|
||||
auto immBufferLength = MAX_UNCOMPRESSED_CHUNK_SIZE;
|
||||
auto immBuffer = std::make_unique<uint8[]>(immBufferLength);
|
||||
auto immLength = DecodeChunkRLE(immBuffer.get(), immBufferLength, src, srcLength);
|
||||
return DecodeChunkRepeat(dst, dstCapacity, immBuffer.get(), immLength);
|
||||
auto immBuffer = AllocateLargeTempBuffer();
|
||||
auto immLength = DecodeChunkRLE(immBuffer, MAX_UNCOMPRESSED_CHUNK_SIZE, src, srcLength);
|
||||
auto size = DecodeChunkRepeat(dst, dstCapacity, immBuffer, immLength);
|
||||
FreeLargeTempBuffer(immBuffer);
|
||||
return size;
|
||||
}
|
||||
|
||||
size_t SawyerChunkReader::DecodeChunkRLE(void * dst, size_t dstCapacity, const void * src, size_t srcLength)
|
||||
|
@ -183,7 +177,7 @@ size_t SawyerChunkReader::DecodeChunkRLE(void * dst, size_t dstCapacity, const v
|
|||
throw SawyerChunkException(EXCEPTION_MSG_DESTINATION_TOO_SMALL);
|
||||
}
|
||||
|
||||
std::fill_n(dst8, count, src8[i]);
|
||||
std::memset(dst8, src8[i], count);
|
||||
dst8 += count;
|
||||
}
|
||||
else
|
||||
|
@ -197,7 +191,7 @@ size_t SawyerChunkReader::DecodeChunkRLE(void * dst, size_t dstCapacity, const v
|
|||
throw SawyerChunkException(EXCEPTION_MSG_DESTINATION_TOO_SMALL);
|
||||
}
|
||||
|
||||
std::copy_n(src8 + i + 1, rleCodeByte + 1, dst8);
|
||||
std::memcpy(dst8, src8 + i + 1, rleCodeByte + 1);
|
||||
dst8 += rleCodeByte + 1;
|
||||
i += rleCodeByte + 1;
|
||||
}
|
||||
|
@ -226,7 +220,7 @@ size_t SawyerChunkReader::DecodeChunkRepeat(void * dst, size_t dstCapacity, cons
|
|||
throw SawyerChunkException(EXCEPTION_MSG_DESTINATION_TOO_SMALL);
|
||||
}
|
||||
|
||||
std::copy_n(copySrc, count, dst8);
|
||||
std::memcpy(dst8, copySrc, count);
|
||||
dst8 += count;
|
||||
}
|
||||
}
|
||||
|
@ -250,3 +244,42 @@ size_t SawyerChunkReader::DecodeChunkRotate(void * dst, size_t dstCapacity, cons
|
|||
}
|
||||
return srcLength;
|
||||
}
|
||||
|
||||
void * SawyerChunkReader::AllocateLargeTempBuffer()
|
||||
{
|
||||
#ifdef __USE_HEAP_ALLOC__
|
||||
auto buffer = HeapAlloc(GetProcessHeap(), 0, MAX_UNCOMPRESSED_CHUNK_SIZE);
|
||||
#else
|
||||
auto buffer = std::malloc(MAX_UNCOMPRESSED_CHUNK_SIZE);
|
||||
#endif
|
||||
if (buffer == nullptr)
|
||||
{
|
||||
throw std::runtime_error("Unable to allocate large temporary buffer.");
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void * SawyerChunkReader::FinaliseLargeTempBuffer(void * buffer, size_t len)
|
||||
{
|
||||
#ifdef __USE_HEAP_ALLOC__
|
||||
auto finalBuffer = std::malloc(len);
|
||||
std::memcpy(finalBuffer, buffer, len);
|
||||
HeapFree(GetProcessHeap(), 0, buffer);
|
||||
#else
|
||||
auto finalBuffer = (uint8 *)std::realloc(buffer, len);
|
||||
#endif
|
||||
if (finalBuffer == nullptr)
|
||||
{
|
||||
throw std::runtime_error("Unable to allocate final buffer.");
|
||||
}
|
||||
return finalBuffer;
|
||||
}
|
||||
|
||||
void SawyerChunkReader::FreeLargeTempBuffer(void * buffer)
|
||||
{
|
||||
#ifdef __USE_HEAP_ALLOC__
|
||||
HeapFree(GetProcessHeap(), 0, buffer);
|
||||
#else
|
||||
std::free(buffer);
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -75,4 +75,8 @@ private:
|
|||
static size_t DecodeChunkRLE(void * dst, size_t dstCapacity, const void * src, size_t srcLength);
|
||||
static size_t DecodeChunkRepeat(void * dst, size_t dstCapacity, const void * src, size_t srcLength);
|
||||
static size_t DecodeChunkRotate(void * dst, size_t dstCapacity, const void * src, size_t srcLength);
|
||||
|
||||
static void * AllocateLargeTempBuffer();
|
||||
static void * FinaliseLargeTempBuffer(void * buffer, size_t len);
|
||||
static void FreeLargeTempBuffer(void * buffer);
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue