Fix #11402: Make string filter locale-aware.

This commit is contained in:
Michael Lutz 2023-11-03 20:43:18 +01:00
parent c294eaacc1
commit 86e28e79fb
9 changed files with 173 additions and 5 deletions

View File

@ -336,6 +336,31 @@ int MacOSStringCompare(std::string_view s1, std::string_view s2)
return (int)CFStringCompareWithOptionsAndLocale(cf1.get(), cf2.get(), CFRangeMake(0, CFStringGetLength(cf1.get())), flags, _osx_locale.get()) + 2;
}
/**
* Search if a string is contained in another string using the current locale.
*
* @param str String to search in.
* @param value String to search for.
* @param case_insensitive Search case-insensitive.
* @return 1 if value was found, 0 if it was not found, or -1 if not supported by the OS.
*/
int MacOSStringContains(const std::string_view str, const std::string_view value, bool case_insensitive)
{
static bool supported = MacOSVersionIsAtLeast(10, 5, 0);
if (!supported) return -1;
CFStringCompareFlags flags = kCFCompareLocalized | kCFCompareWidthInsensitive;
if (case_insensitive) flags |= kCFCompareCaseInsensitive;
CFAutoRelease<CFStringRef> cf_str(CFStringCreateWithBytes(kCFAllocatorDefault, (const UInt8 *)str.data(), str.size(), kCFStringEncodingUTF8, false));
CFAutoRelease<CFStringRef> cf_value(CFStringCreateWithBytes(kCFAllocatorDefault, (const UInt8 *)value.data(), value.size(), kCFStringEncodingUTF8, false));
/* If any CFString could not be created (e.g., due to UTF8 invalid chars), return OS unsupported functionality */
if (cf_str == nullptr || cf_value == nullptr) return -1;
return CFStringFindWithOptionsAndLocale(cf_str.get(), cf_value.get(), CFRangeMake(0, CFStringGetLength(cf_str.get())), flags, _osx_locale.get(), nullptr) ? 1 : 0;
}
/* virtual */ void OSXStringIterator::SetString(const char *s)
{

View File

@ -84,6 +84,7 @@ public:
void MacOSResetScriptCache(FontSize size);
void MacOSSetCurrentLocaleName(const char *iso_code);
int MacOSStringCompare(std::string_view s1, std::string_view s2);
int MacOSStringContains(const std::string_view str, const std::string_view value, bool case_insensitive);
void MacOSRegisterExternalFont(const char *file_path);

View File

@ -19,6 +19,7 @@
#define NO_SHOBJIDL_SORTDIRECTION // Avoid multiple definition of SORT_ASCENDING
#include <shlobj.h> /* SHGetFolderPath */
#include <shellapi.h>
#include <WinNls.h>
#include "win32.h"
#include "../../fios.h"
#include "../../core/alloc_func.hpp"
@ -601,6 +602,44 @@ int OTTDStringCompare(std::string_view s1, std::string_view s2)
return CompareString(MAKELCID(_current_language->winlangid, SORT_DEFAULT), NORM_IGNORECASE, s1_buf, -1, s2_buf, -1);
}
/**
* Search if a string is contained in another string using the current locale.
*
* @param str String to search in.
* @param value String to search for.
* @param case_insensitive Search case-insensitive.
* @return 1 if value was found, 0 if it was not found, or -1 if not supported by the OS.
*/
int Win32StringContains(const std::string_view str, const std::string_view value, bool case_insensitive)
{
typedef int (WINAPI *PFNFINDNLSSTRINGEX)(LPCWSTR, DWORD, LPCWSTR, int, LPCWSTR, int, LPINT, LPNLSVERSIONINFO, LPVOID, LPARAM);
static PFNFINDNLSSTRINGEX _FindNLSStringEx = nullptr;
static bool first_time = true;
if (first_time) {
static DllLoader _kernel32(L"Kernel32.dll");
_FindNLSStringEx = _kernel32.GetProcAddress("FindNLSStringEx");
first_time = false;
}
if (_FindNLSStringEx != nullptr) {
int len_str = MultiByteToWideChar(CP_UTF8, 0, str.data(), (int)str.size(), nullptr, 0);
int len_value = MultiByteToWideChar(CP_UTF8, 0, value.data(), (int)value.size(), nullptr, 0);
if (len_str != 0 && len_value != 0) {
std::wstring str_str(len_str, L'\0'); // len includes terminating null
std::wstring str_value(len_value, L'\0');
MultiByteToWideChar(CP_UTF8, 0, str.data(), (int)str.size(), str_str.data(), len_str);
MultiByteToWideChar(CP_UTF8, 0, value.data(), (int)value.size(), str_value.data(), len_value);
return _FindNLSStringEx(_cur_iso_locale, FIND_FROMSTART | (case_insensitive ? LINGUISTIC_IGNORECASE : 0), str_str.data(), -1, str_value.data(), -1, nullptr, nullptr, nullptr, 0) >= 0 ? 1 : 0;
}
}
return -1; // Failure indication.
}
#ifdef _MSC_VER
/* Based on code from MSDN: https://msdn.microsoft.com/en-us/library/xcb2z8hs.aspx */
const DWORD MS_VC_EXCEPTION = 0x406D1388;

View File

@ -60,5 +60,6 @@ wchar_t *convert_to_fs(const std::string_view name, wchar_t *utf16_buf, size_t b
void Win32SetCurrentLocaleName(const char *iso_code);
int OTTDStringCompare(std::string_view s1, std::string_view s2);
int Win32StringContains(const std::string_view str, const std::string_view value, bool case_insensitive);
#endif /* WIN32_H */

View File

@ -1139,7 +1139,7 @@ CompanyID ScriptDebugWindow::script_debug_company = INVALID_COMPANY;
std::string ScriptDebugWindow::break_string;
bool ScriptDebugWindow::break_check_enabled = true;
bool ScriptDebugWindow::case_sensitive_break_check = false;
StringFilter ScriptDebugWindow::break_string_filter(&ScriptDebugWindow::case_sensitive_break_check);
StringFilter ScriptDebugWindow::break_string_filter(&ScriptDebugWindow::case_sensitive_break_check, false);
/** Make a number of rows with buttons for each company for the Script debug window. */
NWidgetBase *MakeCompanyButtonRowsScriptDebug(int *biggest_index)

View File

@ -635,6 +635,98 @@ int StrNaturalCompare(std::string_view s1, std::string_view s2, bool ignore_garb
return StrCompareIgnoreCase(s1, s2);
}
#ifdef WITH_ICU_I18N
#include <unicode/stsearch.h>
/**
* Search if a string is contained in another string using the current locale.
*
* @param str String to search in.
* @param value String to search for.
* @param case_insensitive Search case-insensitive.
* @return 1 if value was found, 0 if it was not found, or -1 if not supported by the OS.
*/
static int ICUStringContains(const std::string_view str, const std::string_view value, bool case_insensitive)
{
if (_current_collator) {
std::unique_ptr<icu::RuleBasedCollator> coll(dynamic_cast<icu::RuleBasedCollator *>(_current_collator->clone()));
if (coll) {
UErrorCode status = U_ZERO_ERROR;
coll->setStrength(case_insensitive ? icu::Collator::SECONDARY : icu::Collator::TERTIARY);
coll->setAttribute(UCOL_NUMERIC_COLLATION, UCOL_OFF, status);
auto u_str = icu::UnicodeString::fromUTF8(icu::StringPiece(str.data(), str.size()));
auto u_value = icu::UnicodeString::fromUTF8(icu::StringPiece(value.data(), value.size()));
icu::StringSearch u_searcher(u_value, u_str, coll.get(), nullptr, status);
if (U_SUCCESS(status)) {
auto pos = u_searcher.first(status);
if (U_SUCCESS(status)) return pos != USEARCH_DONE ? 1 : 0;
}
}
}
return -1;
}
#endif /* WITH_ICU_I18N */
/**
* Checks if a string is contained in another string with a locale-aware comparison that is case sensitive.
*
* @param str The string to search in.
* @param value The string to search for.
* @return True if a match was found.
*/
[[nodiscard]] bool StrNaturalContains(const std::string_view str, const std::string_view value)
{
#ifdef WITH_ICU_I18N
int res_u = ICUStringContains(str, value, false);
if (res_u >= 0) return res_u > 0;
#endif /* WITH_ICU_I18N */
#if defined(_WIN32) && !defined(STRGEN) && !defined(SETTINGSGEN)
int res = Win32StringContains(str, value, false);
if (res >= 0) return res > 0;
#endif
#if defined(WITH_COCOA) && !defined(STRGEN) && !defined(SETTINGSGEN)
int res = MacOSStringContains(str, value, false);
if (res >= 0) return res > 0;
#endif
return str.find(value) != std::string_view::npos;
}
/**
* Checks if a string is contained in another string with a locale-aware comparison that is case insensitive.
*
* @param str The string to search in.
* @param value The string to search for.
* @return True if a match was found.
*/
[[nodiscard]] bool StrNaturalContainsIgnoreCase(const std::string_view str, const std::string_view value)
{
#ifdef WITH_ICU_I18N
int res_u = ICUStringContains(str, value, true);
if (res_u >= 0) return res_u > 0;
#endif /* WITH_ICU_I18N */
#if defined(_WIN32) && !defined(STRGEN) && !defined(SETTINGSGEN)
int res = Win32StringContains(str, value, true);
if (res >= 0) return res > 0;
#endif
#if defined(WITH_COCOA) && !defined(STRGEN) && !defined(SETTINGSGEN)
int res = MacOSStringContains(str, value, true);
if (res >= 0) return res > 0;
#endif
CaseInsensitiveStringView ci_str{ str.data(), str.size() };
CaseInsensitiveStringView ci_value{ value.data(), value.size() };
return ci_str.find(ci_value) != CaseInsensitiveStringView::npos;
}
#ifdef WITH_UNISCRIBE
/* static */ std::unique_ptr<StringIterator> StringIterator::Create()

View File

@ -39,6 +39,8 @@ void StrTrimInPlace(std::string &str);
[[nodiscard]] int StrCompareIgnoreCase(const std::string_view str1, const std::string_view str2);
[[nodiscard]] bool StrEqualsIgnoreCase(const std::string_view str1, const std::string_view str2);
[[nodiscard]] int StrNaturalCompare(std::string_view s1, std::string_view s2, bool ignore_garbage_at_front = false);
[[nodiscard]] bool StrNaturalContains(const std::string_view str, const std::string_view value);
[[nodiscard]] bool StrNaturalContainsIgnoreCase(const std::string_view str, const std::string_view value);
/** Case insensitive comparator for strings, for example for use in std::map. */
struct CaseInsensitiveComparator {

View File

@ -118,9 +118,16 @@ void StringFilter::AddLine(const char *str)
bool match_case = this->case_sensitive != nullptr && *this->case_sensitive;
for (WordState &ws : this->word_index) {
if (!ws.match) {
if ((match_case ? strstr(str, ws.start) : strcasestr(str, ws.start)) != nullptr) {
ws.match = true;
this->word_matches++;
if (this->locale_aware) {
if (match_case ? StrNaturalContains(str, ws.start) : StrNaturalContainsIgnoreCase(str, ws.start)) {
ws.match = true;
this->word_matches++;
}
} else {
if ((match_case ? strstr(str, ws.start) : strcasestr(str, ws.start)) != nullptr) {
ws.match = true;
this->word_matches++;
}
}
}
}

View File

@ -40,13 +40,14 @@ private:
uint word_matches; ///< Summary of filter state: Number of words matched.
const bool *case_sensitive; ///< Match case-sensitively (usually a static variable).
bool locale_aware; ///< Match words using the current locale.
public:
/**
* Constructor for filter.
* @param case_sensitive Pointer to a (usually static) variable controlling the case-sensitivity. nullptr means always case-insensitive.
*/
StringFilter(const bool *case_sensitive = nullptr) : filter_buffer(nullptr), word_matches(0), case_sensitive(case_sensitive) {}
StringFilter(const bool *case_sensitive = nullptr, bool locale_aware = true) : filter_buffer(nullptr), word_matches(0), case_sensitive(case_sensitive), locale_aware(locale_aware) {}
~StringFilter() { free(this->filter_buffer); }
void SetFilterTerm(const char *str);