Prefetch paint struct bounds (#21894)

With restored `benchspritesort` this yields around 5% better results.
This commit is contained in:
Michał Janiszewski 2024-04-25 16:14:17 +02:00 committed by GitHub
parent 0e1e93e898
commit 49b28f1757
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 37 additions and 0 deletions

View File

@ -567,6 +567,7 @@
<ClInclude Include="ui\UiContext.h" />
<ClInclude Include="ui\WindowManager.h" />
<ClInclude Include="util\Math.hpp" />
<ClInclude Include="util/Prefetch.h" />
<ClInclude Include="util\SawyerCoding.h" />
<ClInclude Include="util\Util.h" />
<ClInclude Include="Version.h" />

View File

@ -20,6 +20,7 @@
#include "../paint/Painter.h"
#include "../profiling/Profiling.h"
#include "../util/Math.hpp"
#include "../util/Prefetch.h"
#include "Boundbox.h"
#include "Paint.Entity.h"
#include "tile_element/Paint.TileElement.h"
@ -391,6 +392,7 @@ template<uint8_t TRotation> static void PaintStructsSortQuadrant(PaintStruct* pa
auto* ps = child;
child = child->NextQuadrantEntry;
PREFETCH(&child->Bounds);
if (child == nullptr || child->SortFlags & PaintSortFlags::OutsideQuadrant)
{
break;

View File

@ -0,0 +1,34 @@
/*****************************************************************************
* Copyright (c) 2014-2024 OpenRCT2 developers
*
* For a complete list of all authors, please refer to contributors.md
* Interested in contributing? Visit https://github.com/OpenRCT2/OpenRCT2
*
* OpenRCT2 is licensed under the GNU General Public License version 3.
*****************************************************************************/
#if defined(__amd64__) || defined(_M_AMD64) || defined(__i386__) || defined(_M_IX86)
// Don't bother checking for CPUID, prefetch is available since Pentium 4
# include <xmmintrin.h>
// This cannot be expressed as `constexpr` function, exclude it from clang-tidy check
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
# define PREFETCH(x) _mm_prefetch(reinterpret_cast<const char*>(x), _MM_HINT_T0)
#elif defined(_MSC_VER) && defined(_M_ARM64)
// ARM64 prefetch is available since ARMv8.
// MSVC's help (https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-170)
// is incorrect and points to a different header (arm64_neon.h), which doesn't contain prefetch.
// Correct header info taken from
// https://github.com/microsoft/wdkmetadata/blob/1ac0dd0719f19196334de12cf2a6dec20316d440/generation/WDK/IdlHeaders/km/crt/intrin.h#L2
// and
// https://github.com/microsoft/wdkmetadata/blob/1ac0dd0719f19196334de12cf2a6dec20316d440/generation/WDK/IdlHeaders/km/crt/intrin.h#L411
# include <intrin.h>
# define PREFETCH(x) __prefetch(x)
#elif defined(__GNUC__)
// Let the compiler handle prefetch instruction
# define PREFETCH(x) __builtin_prefetch(x)
#else
# define PREFETCH(x)
#endif