From 86e28e79fbc59e8a61106b76e026aba324374684 Mon Sep 17 00:00:00 2001 From: Michael Lutz Date: Fri, 3 Nov 2023 20:43:18 +0100 Subject: [PATCH] Fix #11402: Make string filter locale-aware. --- src/os/macosx/string_osx.cpp | 25 ++++++++++ src/os/macosx/string_osx.h | 1 + src/os/windows/win32.cpp | 39 +++++++++++++++ src/os/windows/win32.h | 1 + src/script/script_gui.cpp | 2 +- src/string.cpp | 92 ++++++++++++++++++++++++++++++++++++ src/string_func.h | 2 + src/stringfilter.cpp | 13 +++-- src/stringfilter_type.h | 3 +- 9 files changed, 173 insertions(+), 5 deletions(-) diff --git a/src/os/macosx/string_osx.cpp b/src/os/macosx/string_osx.cpp index 66e17e18f4..cd281257b0 100644 --- a/src/os/macosx/string_osx.cpp +++ b/src/os/macosx/string_osx.cpp @@ -336,6 +336,31 @@ int MacOSStringCompare(std::string_view s1, std::string_view s2) return (int)CFStringCompareWithOptionsAndLocale(cf1.get(), cf2.get(), CFRangeMake(0, CFStringGetLength(cf1.get())), flags, _osx_locale.get()) + 2; } +/** + * Search if a string is contained in another string using the current locale. + * + * @param str String to search in. + * @param value String to search for. + * @param case_insensitive Search case-insensitive. + * @return 1 if value was found, 0 if it was not found, or -1 if not supported by the OS. + */ +int MacOSStringContains(const std::string_view str, const std::string_view value, bool case_insensitive) +{ + static bool supported = MacOSVersionIsAtLeast(10, 5, 0); + if (!supported) return -1; + + CFStringCompareFlags flags = kCFCompareLocalized | kCFCompareWidthInsensitive; + if (case_insensitive) flags |= kCFCompareCaseInsensitive; + + CFAutoRelease cf_str(CFStringCreateWithBytes(kCFAllocatorDefault, (const UInt8 *)str.data(), str.size(), kCFStringEncodingUTF8, false)); + CFAutoRelease cf_value(CFStringCreateWithBytes(kCFAllocatorDefault, (const UInt8 *)value.data(), value.size(), kCFStringEncodingUTF8, false)); + + /* If any CFString could not be created (e.g., due to UTF8 invalid chars), return OS unsupported functionality */ + if (cf_str == nullptr || cf_value == nullptr) return -1; + + return CFStringFindWithOptionsAndLocale(cf_str.get(), cf_value.get(), CFRangeMake(0, CFStringGetLength(cf_str.get())), flags, _osx_locale.get(), nullptr) ? 1 : 0; +} + /* virtual */ void OSXStringIterator::SetString(const char *s) { diff --git a/src/os/macosx/string_osx.h b/src/os/macosx/string_osx.h index 8fd9d988dc..8d475882e2 100644 --- a/src/os/macosx/string_osx.h +++ b/src/os/macosx/string_osx.h @@ -84,6 +84,7 @@ public: void MacOSResetScriptCache(FontSize size); void MacOSSetCurrentLocaleName(const char *iso_code); int MacOSStringCompare(std::string_view s1, std::string_view s2); +int MacOSStringContains(const std::string_view str, const std::string_view value, bool case_insensitive); void MacOSRegisterExternalFont(const char *file_path); diff --git a/src/os/windows/win32.cpp b/src/os/windows/win32.cpp index 3611dfe1af..0b732ca431 100644 --- a/src/os/windows/win32.cpp +++ b/src/os/windows/win32.cpp @@ -19,6 +19,7 @@ #define NO_SHOBJIDL_SORTDIRECTION // Avoid multiple definition of SORT_ASCENDING #include /* SHGetFolderPath */ #include +#include #include "win32.h" #include "../../fios.h" #include "../../core/alloc_func.hpp" @@ -601,6 +602,44 @@ int OTTDStringCompare(std::string_view s1, std::string_view s2) return CompareString(MAKELCID(_current_language->winlangid, SORT_DEFAULT), NORM_IGNORECASE, s1_buf, -1, s2_buf, -1); } +/** + * Search if a string is contained in another string using the current locale. + * + * @param str String to search in. + * @param value String to search for. + * @param case_insensitive Search case-insensitive. + * @return 1 if value was found, 0 if it was not found, or -1 if not supported by the OS. + */ +int Win32StringContains(const std::string_view str, const std::string_view value, bool case_insensitive) +{ + typedef int (WINAPI *PFNFINDNLSSTRINGEX)(LPCWSTR, DWORD, LPCWSTR, int, LPCWSTR, int, LPINT, LPNLSVERSIONINFO, LPVOID, LPARAM); + static PFNFINDNLSSTRINGEX _FindNLSStringEx = nullptr; + static bool first_time = true; + + if (first_time) { + static DllLoader _kernel32(L"Kernel32.dll"); + _FindNLSStringEx = _kernel32.GetProcAddress("FindNLSStringEx"); + first_time = false; + } + + if (_FindNLSStringEx != nullptr) { + int len_str = MultiByteToWideChar(CP_UTF8, 0, str.data(), (int)str.size(), nullptr, 0); + int len_value = MultiByteToWideChar(CP_UTF8, 0, value.data(), (int)value.size(), nullptr, 0); + + if (len_str != 0 && len_value != 0) { + std::wstring str_str(len_str, L'\0'); // len includes terminating null + std::wstring str_value(len_value, L'\0'); + + MultiByteToWideChar(CP_UTF8, 0, str.data(), (int)str.size(), str_str.data(), len_str); + MultiByteToWideChar(CP_UTF8, 0, value.data(), (int)value.size(), str_value.data(), len_value); + + return _FindNLSStringEx(_cur_iso_locale, FIND_FROMSTART | (case_insensitive ? LINGUISTIC_IGNORECASE : 0), str_str.data(), -1, str_value.data(), -1, nullptr, nullptr, nullptr, 0) >= 0 ? 1 : 0; + } + } + + return -1; // Failure indication. +} + #ifdef _MSC_VER /* Based on code from MSDN: https://msdn.microsoft.com/en-us/library/xcb2z8hs.aspx */ const DWORD MS_VC_EXCEPTION = 0x406D1388; diff --git a/src/os/windows/win32.h b/src/os/windows/win32.h index ab19a7d1ae..561d471bac 100644 --- a/src/os/windows/win32.h +++ b/src/os/windows/win32.h @@ -60,5 +60,6 @@ wchar_t *convert_to_fs(const std::string_view name, wchar_t *utf16_buf, size_t b void Win32SetCurrentLocaleName(const char *iso_code); int OTTDStringCompare(std::string_view s1, std::string_view s2); +int Win32StringContains(const std::string_view str, const std::string_view value, bool case_insensitive); #endif /* WIN32_H */ diff --git a/src/script/script_gui.cpp b/src/script/script_gui.cpp index f393917c4e..39cf1a00a1 100644 --- a/src/script/script_gui.cpp +++ b/src/script/script_gui.cpp @@ -1139,7 +1139,7 @@ CompanyID ScriptDebugWindow::script_debug_company = INVALID_COMPANY; std::string ScriptDebugWindow::break_string; bool ScriptDebugWindow::break_check_enabled = true; bool ScriptDebugWindow::case_sensitive_break_check = false; -StringFilter ScriptDebugWindow::break_string_filter(&ScriptDebugWindow::case_sensitive_break_check); +StringFilter ScriptDebugWindow::break_string_filter(&ScriptDebugWindow::case_sensitive_break_check, false); /** Make a number of rows with buttons for each company for the Script debug window. */ NWidgetBase *MakeCompanyButtonRowsScriptDebug(int *biggest_index) diff --git a/src/string.cpp b/src/string.cpp index 7e54b63912..020fd2fd47 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -635,6 +635,98 @@ int StrNaturalCompare(std::string_view s1, std::string_view s2, bool ignore_garb return StrCompareIgnoreCase(s1, s2); } +#ifdef WITH_ICU_I18N + +#include + +/** + * Search if a string is contained in another string using the current locale. + * + * @param str String to search in. + * @param value String to search for. + * @param case_insensitive Search case-insensitive. + * @return 1 if value was found, 0 if it was not found, or -1 if not supported by the OS. + */ +static int ICUStringContains(const std::string_view str, const std::string_view value, bool case_insensitive) +{ + if (_current_collator) { + std::unique_ptr coll(dynamic_cast(_current_collator->clone())); + if (coll) { + UErrorCode status = U_ZERO_ERROR; + coll->setStrength(case_insensitive ? icu::Collator::SECONDARY : icu::Collator::TERTIARY); + coll->setAttribute(UCOL_NUMERIC_COLLATION, UCOL_OFF, status); + + auto u_str = icu::UnicodeString::fromUTF8(icu::StringPiece(str.data(), str.size())); + auto u_value = icu::UnicodeString::fromUTF8(icu::StringPiece(value.data(), value.size())); + icu::StringSearch u_searcher(u_value, u_str, coll.get(), nullptr, status); + if (U_SUCCESS(status)) { + auto pos = u_searcher.first(status); + if (U_SUCCESS(status)) return pos != USEARCH_DONE ? 1 : 0; + } + } + } + + return -1; +} +#endif /* WITH_ICU_I18N */ + +/** + * Checks if a string is contained in another string with a locale-aware comparison that is case sensitive. + * + * @param str The string to search in. + * @param value The string to search for. + * @return True if a match was found. + */ +[[nodiscard]] bool StrNaturalContains(const std::string_view str, const std::string_view value) +{ +#ifdef WITH_ICU_I18N + int res_u = ICUStringContains(str, value, false); + if (res_u >= 0) return res_u > 0; +#endif /* WITH_ICU_I18N */ + +#if defined(_WIN32) && !defined(STRGEN) && !defined(SETTINGSGEN) + int res = Win32StringContains(str, value, false); + if (res >= 0) return res > 0; +#endif + +#if defined(WITH_COCOA) && !defined(STRGEN) && !defined(SETTINGSGEN) + int res = MacOSStringContains(str, value, false); + if (res >= 0) return res > 0; +#endif + + return str.find(value) != std::string_view::npos; +} + +/** + * Checks if a string is contained in another string with a locale-aware comparison that is case insensitive. + * + * @param str The string to search in. + * @param value The string to search for. + * @return True if a match was found. + */ +[[nodiscard]] bool StrNaturalContainsIgnoreCase(const std::string_view str, const std::string_view value) +{ +#ifdef WITH_ICU_I18N + int res_u = ICUStringContains(str, value, true); + if (res_u >= 0) return res_u > 0; +#endif /* WITH_ICU_I18N */ + +#if defined(_WIN32) && !defined(STRGEN) && !defined(SETTINGSGEN) + int res = Win32StringContains(str, value, true); + if (res >= 0) return res > 0; +#endif + +#if defined(WITH_COCOA) && !defined(STRGEN) && !defined(SETTINGSGEN) + int res = MacOSStringContains(str, value, true); + if (res >= 0) return res > 0; +#endif + + CaseInsensitiveStringView ci_str{ str.data(), str.size() }; + CaseInsensitiveStringView ci_value{ value.data(), value.size() }; + return ci_str.find(ci_value) != CaseInsensitiveStringView::npos; +} + + #ifdef WITH_UNISCRIBE /* static */ std::unique_ptr StringIterator::Create() diff --git a/src/string_func.h b/src/string_func.h index 21e24b5114..876659c43d 100644 --- a/src/string_func.h +++ b/src/string_func.h @@ -39,6 +39,8 @@ void StrTrimInPlace(std::string &str); [[nodiscard]] int StrCompareIgnoreCase(const std::string_view str1, const std::string_view str2); [[nodiscard]] bool StrEqualsIgnoreCase(const std::string_view str1, const std::string_view str2); [[nodiscard]] int StrNaturalCompare(std::string_view s1, std::string_view s2, bool ignore_garbage_at_front = false); +[[nodiscard]] bool StrNaturalContains(const std::string_view str, const std::string_view value); +[[nodiscard]] bool StrNaturalContainsIgnoreCase(const std::string_view str, const std::string_view value); /** Case insensitive comparator for strings, for example for use in std::map. */ struct CaseInsensitiveComparator { diff --git a/src/stringfilter.cpp b/src/stringfilter.cpp index 87a8bd5f3f..16800c2440 100644 --- a/src/stringfilter.cpp +++ b/src/stringfilter.cpp @@ -118,9 +118,16 @@ void StringFilter::AddLine(const char *str) bool match_case = this->case_sensitive != nullptr && *this->case_sensitive; for (WordState &ws : this->word_index) { if (!ws.match) { - if ((match_case ? strstr(str, ws.start) : strcasestr(str, ws.start)) != nullptr) { - ws.match = true; - this->word_matches++; + if (this->locale_aware) { + if (match_case ? StrNaturalContains(str, ws.start) : StrNaturalContainsIgnoreCase(str, ws.start)) { + ws.match = true; + this->word_matches++; + } + } else { + if ((match_case ? strstr(str, ws.start) : strcasestr(str, ws.start)) != nullptr) { + ws.match = true; + this->word_matches++; + } } } } diff --git a/src/stringfilter_type.h b/src/stringfilter_type.h index 1d3098abad..ec7d545fd8 100644 --- a/src/stringfilter_type.h +++ b/src/stringfilter_type.h @@ -40,13 +40,14 @@ private: uint word_matches; ///< Summary of filter state: Number of words matched. const bool *case_sensitive; ///< Match case-sensitively (usually a static variable). + bool locale_aware; ///< Match words using the current locale. public: /** * Constructor for filter. * @param case_sensitive Pointer to a (usually static) variable controlling the case-sensitivity. nullptr means always case-insensitive. */ - StringFilter(const bool *case_sensitive = nullptr) : filter_buffer(nullptr), word_matches(0), case_sensitive(case_sensitive) {} + StringFilter(const bool *case_sensitive = nullptr, bool locale_aware = true) : filter_buffer(nullptr), word_matches(0), case_sensitive(case_sensitive), locale_aware(locale_aware) {} ~StringFilter() { free(this->filter_buffer); } void SetFilterTerm(const char *str);