Fix #9935: Use more selectivity when building SSE specific code

This commit is contained in:
glx22 2022-06-30 01:42:36 +02:00 committed by Loïc Guilloux
parent e9507e7ffa
commit bba6ad1f4e
12 changed files with 39 additions and 24 deletions

View File

@ -29,11 +29,6 @@ add_files(
viewport_sprite_sorter_sse4.cpp
CONDITION SSE_FOUND
)
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
set_compile_flags(
viewport_sprite_sorter_sse4.cpp
COMPILE_FLAGS -msse4.1)
endif()
add_files(
aircraft.h

View File

@ -19,6 +19,7 @@
/** Instantiation of the partially SSSE2 32bpp with animation blitter factory. */
static FBlitter_32bppSSE2_Anim iFBlitter_32bppSSE2_Anim;
GNU_TARGET("sse2")
void Blitter_32bppSSE2_Anim::PaletteAnimate(const Palette &palette)
{
assert(!_screen_disable_anim);

View File

@ -16,6 +16,10 @@
#define SSE_VERSION 2
#endif
#ifndef SSE_TARGET
#define SSE_TARGET "sse2"
#endif
#ifndef FULL_ANIMATION
#define FULL_ANIMATION 1
#endif

View File

@ -29,6 +29,7 @@ static FBlitter_32bppSSE4_Anim iFBlitter_32bppSSE4_Anim;
*/
IGNORE_UNINITIALIZED_WARNING_START
template <BlitterMode mode, Blitter_32bppSSE2::ReadMode read_mode, Blitter_32bppSSE2::BlockType bt_last, bool translucent, bool animated>
GNU_TARGET("sse4.1")
inline void Blitter_32bppSSE4_Anim::Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom)
{
const byte * const remap = bp->remap;

View File

@ -16,6 +16,10 @@
#define SSE_VERSION 4
#endif
#ifndef SSE_TARGET
#define SSE_TARGET "sse4.1"
#endif
#ifndef FULL_ANIMATION
#define FULL_ANIMATION 1
#endif

View File

@ -16,6 +16,10 @@
#define SSE_VERSION 2
#endif
#ifndef SSE_TARGET
#define SSE_TARGET "sse2"
#endif
#ifndef FULL_ANIMATION
#define FULL_ANIMATION 0
#endif

View File

@ -16,6 +16,10 @@
#define SSE_VERSION 4
#endif
#ifndef SSE_TARGET
#define SSE_TARGET "sse4.1"
#endif
#ifndef FULL_ANIMATION
#define FULL_ANIMATION 0
#endif

View File

@ -12,6 +12,7 @@
#ifdef WITH_SSE
GNU_TARGET(SSE_TARGET)
static inline void InsertFirstUint32(const uint32 value, __m128i &into)
{
#if (SSE_VERSION >= 4)
@ -22,6 +23,7 @@ static inline void InsertFirstUint32(const uint32 value, __m128i &into)
#endif
}
GNU_TARGET(SSE_TARGET)
static inline void InsertSecondUint32(const uint32 value, __m128i &into)
{
#if (SSE_VERSION >= 4)
@ -32,6 +34,7 @@ static inline void InsertSecondUint32(const uint32 value, __m128i &into)
#endif
}
GNU_TARGET(SSE_TARGET)
static inline void LoadUint64(const uint64 value, __m128i &into)
{
#ifdef POINTER_IS_64BIT
@ -46,6 +49,7 @@ static inline void LoadUint64(const uint64 value, __m128i &into)
#endif
}
GNU_TARGET(SSE_TARGET)
static inline __m128i PackUnsaturated(__m128i from, const __m128i &mask)
{
#if (SSE_VERSION == 2)
@ -56,6 +60,7 @@ static inline __m128i PackUnsaturated(__m128i from, const __m128i &mask)
#endif
}
GNU_TARGET(SSE_TARGET)
static inline __m128i DistributeAlpha(const __m128i from, const __m128i &mask)
{
#if (SSE_VERSION == 2)
@ -66,6 +71,7 @@ static inline __m128i DistributeAlpha(const __m128i from, const __m128i &mask)
#endif
}
GNU_TARGET(SSE_TARGET)
static inline __m128i AlphaBlendTwoPixels(__m128i src, __m128i dst, const __m128i &distribution_mask, const __m128i &pack_mask)
{
__m128i srcAB = _mm_unpacklo_epi8(src, _mm_setzero_si128()); // PUNPCKLBW, expand each uint8 into uint16
@ -86,6 +92,7 @@ static inline __m128i AlphaBlendTwoPixels(__m128i src, __m128i dst, const __m128
/* Darken 2 pixels.
* rgb = rgb * ((256/4) * 4 - (alpha/4)) / ((256/4) * 4)
*/
GNU_TARGET(SSE_TARGET)
static inline __m128i DarkenTwoPixels(__m128i src, __m128i dst, const __m128i &distribution_mask, const __m128i &tr_nom_base)
{
__m128i srcAB = _mm_unpacklo_epi8(src, _mm_setzero_si128());
@ -99,6 +106,7 @@ static inline __m128i DarkenTwoPixels(__m128i src, __m128i dst, const __m128i &d
}
IGNORE_UNINITIALIZED_WARNING_START
GNU_TARGET(SSE_TARGET)
static Colour ReallyAdjustBrightness(Colour colour, uint8 brightness)
{
uint64 c16 = colour.b | (uint64) colour.g << 16 | (uint64) colour.r << 32;
@ -141,6 +149,7 @@ static inline Colour AdjustBrightneSSE(Colour colour, uint8 brightness)
return ReallyAdjustBrightness(colour, brightness);
}
GNU_TARGET(SSE_TARGET)
static inline __m128i AdjustBrightnessOfTwoPixels(__m128i from, uint32 brightness)
{
#if (SSE_VERSION < 3)
@ -192,6 +201,7 @@ static inline __m128i AdjustBrightnessOfTwoPixels(__m128i from, uint32 brightnes
*/
IGNORE_UNINITIALIZED_WARNING_START
template <BlitterMode mode, Blitter_32bppSSE2::ReadMode read_mode, Blitter_32bppSSE2::BlockType bt_last, bool translucent>
GNU_TARGET(SSE_TARGET)
#if (SSE_VERSION == 2)
inline void Blitter_32bppSSE2::Draw(const Blitter::BlitterParams *bp, ZoomLevel zoom)
#elif (SSE_VERSION == 3)

View File

@ -16,6 +16,10 @@
#define SSE_VERSION 3
#endif
#ifndef SSE_TARGET
#define SSE_TARGET "ssse3"
#endif
#ifndef FULL_ANIMATION
#define FULL_ANIMATION 0
#endif

View File

@ -38,21 +38,6 @@ add_files(
CONDITION NOT OPTION_DEDICATED AND OPENGL_FOUND
)
if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU" OR CMAKE_CXX_COMPILER_ID STREQUAL "Clang" OR CMAKE_CXX_COMPILER_ID STREQUAL "AppleClang")
set_compile_flags(
32bpp_anim_sse2.cpp
32bpp_sse2.cpp
COMPILE_FLAGS -msse2)
set_compile_flags(
32bpp_ssse3.cpp
COMPILE_FLAGS -mssse3)
set_compile_flags(
32bpp_anim_sse4.cpp
32bpp_sse4.cpp
COMPILE_FLAGS -msse4.1)
endif()
add_files(
base.hpp
common.hpp

View File

@ -418,11 +418,13 @@ static_assert(SIZE_MAX >= UINT32_MAX);
#endif /* __APPLE__ */
#if defined(__GNUC__) || defined(__clang__)
# define likely(x) __builtin_expect(!!(x), 1)
# define unlikely(x) __builtin_expect(!!(x), 0)
# define likely(x) __builtin_expect(!!(x), 1)
# define unlikely(x) __builtin_expect(!!(x), 0)
# define GNU_TARGET(x) [[gnu::target(x)]]
#else
# define likely(x) (x)
# define unlikely(x) (x)
# define likely(x) (x)
# define unlikely(x) (x)
# define GNU_TARGET(x)
#endif /* __GNUC__ || __clang__ */
/* For the FMT library we only want to use the headers, not link to some library. */

View File

@ -26,6 +26,7 @@
# define LOAD_128 _mm_loadu_si128
#endif
GNU_TARGET("sse4.1")
void ViewportSortParentSpritesSSE41(ParentSpriteToSortVector *psdv)
{
if (psdv->size() < 2) return;