From 49b28f17578d47534dd31762d79751d430323b9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Janiszewski?= Date: Thu, 25 Apr 2024 16:14:17 +0200 Subject: [PATCH] Prefetch paint struct bounds (#21894) With restored `benchspritesort` this yields around 5% better results. --- src/openrct2/libopenrct2.vcxproj | 1 + src/openrct2/paint/Paint.cpp | 2 ++ src/openrct2/util/Prefetch.h | 34 ++++++++++++++++++++++++++++++++ 3 files changed, 37 insertions(+) create mode 100644 src/openrct2/util/Prefetch.h diff --git a/src/openrct2/libopenrct2.vcxproj b/src/openrct2/libopenrct2.vcxproj index 3dda36af25..be99f4ae3e 100644 --- a/src/openrct2/libopenrct2.vcxproj +++ b/src/openrct2/libopenrct2.vcxproj @@ -567,6 +567,7 @@ + diff --git a/src/openrct2/paint/Paint.cpp b/src/openrct2/paint/Paint.cpp index 53f82b608f..76ddd1c0ee 100644 --- a/src/openrct2/paint/Paint.cpp +++ b/src/openrct2/paint/Paint.cpp @@ -20,6 +20,7 @@ #include "../paint/Painter.h" #include "../profiling/Profiling.h" #include "../util/Math.hpp" +#include "../util/Prefetch.h" #include "Boundbox.h" #include "Paint.Entity.h" #include "tile_element/Paint.TileElement.h" @@ -391,6 +392,7 @@ template static void PaintStructsSortQuadrant(PaintStruct* pa auto* ps = child; child = child->NextQuadrantEntry; + PREFETCH(&child->Bounds); if (child == nullptr || child->SortFlags & PaintSortFlags::OutsideQuadrant) { break; diff --git a/src/openrct2/util/Prefetch.h b/src/openrct2/util/Prefetch.h new file mode 100644 index 0000000000..8fbeb40556 --- /dev/null +++ b/src/openrct2/util/Prefetch.h @@ -0,0 +1,34 @@ +/***************************************************************************** + * Copyright (c) 2014-2024 OpenRCT2 developers + * + * For a complete list of all authors, please refer to contributors.md + * Interested in contributing? Visit https://github.com/OpenRCT2/OpenRCT2 + * + * OpenRCT2 is licensed under the GNU General Public License version 3. + *****************************************************************************/ + +#if defined(__amd64__) || defined(_M_AMD64) || defined(__i386__) || defined(_M_IX86) +// Don't bother checking for CPUID, prefetch is available since Pentium 4 +# include +// This cannot be expressed as `constexpr` function, exclude it from clang-tidy check +// NOLINTNEXTLINE(cppcoreguidelines-macro-usage) +# define PREFETCH(x) _mm_prefetch(reinterpret_cast(x), _MM_HINT_T0) + +#elif defined(_MSC_VER) && defined(_M_ARM64) +// ARM64 prefetch is available since ARMv8. +// MSVC's help (https://learn.microsoft.com/en-us/cpp/intrinsics/arm64-intrinsics?view=msvc-170) +// is incorrect and points to a different header (arm64_neon.h), which doesn't contain prefetch. +// Correct header info taken from +// https://github.com/microsoft/wdkmetadata/blob/1ac0dd0719f19196334de12cf2a6dec20316d440/generation/WDK/IdlHeaders/km/crt/intrin.h#L2 +// and +// https://github.com/microsoft/wdkmetadata/blob/1ac0dd0719f19196334de12cf2a6dec20316d440/generation/WDK/IdlHeaders/km/crt/intrin.h#L411 +# include +# define PREFETCH(x) __prefetch(x) + +#elif defined(__GNUC__) +// Let the compiler handle prefetch instruction +# define PREFETCH(x) __builtin_prefetch(x) + +#else +# define PREFETCH(x) +#endif