From a886bd96661a602f813424021b964da5c0e240d5 Mon Sep 17 00:00:00 2001 From: frosch Date: Sun, 17 Mar 2024 00:42:38 +0100 Subject: [PATCH] Fix #12319, 3a676a5: Some SSE blitters were broken due to ODR violations (#12322) --- src/blitter/32bpp_sse_func.hpp | 27 +++++++++++++++++---------- src/blitter/32bpp_sse_type.h | 6 ++++++ 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/blitter/32bpp_sse_func.hpp b/src/blitter/32bpp_sse_func.hpp index e02a232648..77c2bec954 100644 --- a/src/blitter/32bpp_sse_func.hpp +++ b/src/blitter/32bpp_sse_func.hpp @@ -10,10 +10,17 @@ #ifndef BLITTER_32BPP_SSE_FUNC_HPP #define BLITTER_32BPP_SSE_FUNC_HPP +/* ATTENTION + * This file is compiled multiple times with different defines for SSE_VERSION and MARGIN_NORMAL_THRESHOLD. + * Be careful when declaring things with external linkage. + * Use internal linkage instead, i.e. "static". + */ +#define INTERNAL_LINKAGE static + #ifdef WITH_SSE GNU_TARGET(SSE_TARGET) -inline void InsertFirstUint32(const uint32_t value, __m128i &into) +INTERNAL_LINKAGE inline void InsertFirstUint32(const uint32_t value, __m128i &into) { #if (SSE_VERSION >= 4) into = _mm_insert_epi32(into, value, 0); @@ -24,7 +31,7 @@ inline void InsertFirstUint32(const uint32_t value, __m128i &into) } GNU_TARGET(SSE_TARGET) -inline void InsertSecondUint32(const uint32_t value, __m128i &into) +INTERNAL_LINKAGE inline void InsertSecondUint32(const uint32_t value, __m128i &into) { #if (SSE_VERSION >= 4) into = _mm_insert_epi32(into, value, 1); @@ -35,7 +42,7 @@ inline void InsertSecondUint32(const uint32_t value, __m128i &into) } GNU_TARGET(SSE_TARGET) -inline void LoadUint64(const uint64_t value, __m128i &into) +INTERNAL_LINKAGE inline void LoadUint64(const uint64_t value, __m128i &into) { #ifdef POINTER_IS_64BIT into = _mm_cvtsi64_si128(value); @@ -50,7 +57,7 @@ inline void LoadUint64(const uint64_t value, __m128i &into) } GNU_TARGET(SSE_TARGET) -inline __m128i PackUnsaturated(__m128i from, const __m128i &mask) +INTERNAL_LINKAGE inline __m128i PackUnsaturated(__m128i from, const __m128i &mask) { #if (SSE_VERSION == 2) from = _mm_and_si128(from, mask); // PAND, wipe high bytes to keep low bytes when packing @@ -61,7 +68,7 @@ inline __m128i PackUnsaturated(__m128i from, const __m128i &mask) } GNU_TARGET(SSE_TARGET) -inline __m128i DistributeAlpha(const __m128i from, const __m128i &mask) +INTERNAL_LINKAGE inline __m128i DistributeAlpha(const __m128i from, const __m128i &mask) { #if (SSE_VERSION == 2) __m128i alphaAB = _mm_shufflelo_epi16(from, 0x3F); // PSHUFLW, put alpha1 in front of each rgb1 @@ -73,7 +80,7 @@ inline __m128i DistributeAlpha(const __m128i from, const __m128i &mask) } GNU_TARGET(SSE_TARGET) -inline __m128i AlphaBlendTwoPixels(__m128i src, __m128i dst, const __m128i &distribution_mask, const __m128i &pack_mask, const __m128i &alpha_mask) +INTERNAL_LINKAGE inline __m128i AlphaBlendTwoPixels(__m128i src, __m128i dst, const __m128i &distribution_mask, const __m128i &pack_mask, const __m128i &alpha_mask) { __m128i srcAB = _mm_unpacklo_epi8(src, _mm_setzero_si128()); // PUNPCKLBW, expand each uint8_t into uint16 __m128i dstAB = _mm_unpacklo_epi8(dst, _mm_setzero_si128()); @@ -97,7 +104,7 @@ inline __m128i AlphaBlendTwoPixels(__m128i src, __m128i dst, const __m128i &dist * rgb = rgb * ((256/4) * 4 - (alpha/4)) / ((256/4) * 4) */ GNU_TARGET(SSE_TARGET) -inline __m128i DarkenTwoPixels(__m128i src, __m128i dst, const __m128i &distribution_mask, const __m128i &tr_nom_base) +INTERNAL_LINKAGE inline __m128i DarkenTwoPixels(__m128i src, __m128i dst, const __m128i &distribution_mask, const __m128i &tr_nom_base) { __m128i srcAB = _mm_unpacklo_epi8(src, _mm_setzero_si128()); __m128i dstAB = _mm_unpacklo_epi8(dst, _mm_setzero_si128()); @@ -111,7 +118,7 @@ inline __m128i DarkenTwoPixels(__m128i src, __m128i dst, const __m128i &distribu IGNORE_UNINITIALIZED_WARNING_START GNU_TARGET(SSE_TARGET) -static Colour ReallyAdjustBrightness(Colour colour, uint8_t brightness) +INTERNAL_LINKAGE Colour ReallyAdjustBrightness(Colour colour, uint8_t brightness) { uint64_t c16 = colour.b | (uint64_t) colour.g << 16 | (uint64_t) colour.r << 32; c16 *= brightness; @@ -145,7 +152,7 @@ IGNORE_UNINITIALIZED_WARNING_STOP /** ReallyAdjustBrightness() is not called that often. * Inlining this function implies a far jump, which has a huge latency. */ -inline Colour AdjustBrightneSSE(Colour colour, uint8_t brightness) +INTERNAL_LINKAGE inline Colour AdjustBrightneSSE(Colour colour, uint8_t brightness) { /* Shortcut for normal brightness. */ if (brightness == Blitter_32bppBase::DEFAULT_BRIGHTNESS) return colour; @@ -154,7 +161,7 @@ inline Colour AdjustBrightneSSE(Colour colour, uint8_t brightness) } GNU_TARGET(SSE_TARGET) -inline __m128i AdjustBrightnessOfTwoPixels([[maybe_unused]] __m128i from, [[maybe_unused]] uint32_t brightness) +INTERNAL_LINKAGE inline __m128i AdjustBrightnessOfTwoPixels([[maybe_unused]] __m128i from, [[maybe_unused]] uint32_t brightness) { #if (SSE_VERSION < 3) NOT_REACHED(); diff --git a/src/blitter/32bpp_sse_type.h b/src/blitter/32bpp_sse_type.h index d3a73df1c3..e8662e4949 100644 --- a/src/blitter/32bpp_sse_type.h +++ b/src/blitter/32bpp_sse_type.h @@ -10,6 +10,12 @@ #ifndef BLITTER_32BPP_SSE_TYPE_H #define BLITTER_32BPP_SSE_TYPE_H +/* ATTENTION + * This file is compiled multiple times with different defines for SSE_VERSION. + * Be careful when declaring things with external linkage. + * Use internal linkage instead, i.e. "static". + */ + #ifdef WITH_SSE #include "32bpp_simple.hpp"