diff --git a/src/os/macosx/string_osx.cpp b/src/os/macosx/string_osx.cpp index 93c50722bd..4698f601be 100644 --- a/src/os/macosx/string_osx.cpp +++ b/src/os/macosx/string_osx.cpp @@ -11,6 +11,7 @@ #include "../../stdafx.h" #include "string_osx.h" +#include "../../string_func.h" #include "macos.h" #include @@ -56,6 +57,122 @@ int MacOSStringCompare(const char *s1, const char *s2) return (int)res + 2; } + +/* virtual */ void OSXStringIterator::SetString(const char *s) +{ + const char *string_base = s; + + this->utf16_to_utf8.clear(); + this->str_info.clear(); + this->cur_pos = 0; + + /* CoreText operates on UTF-16, thus we have to convert the input string. + * To be able to return proper offsets, we have to create a mapping at the same time. */ + std::vector utf16_str; ///< UTF-16 copy of the string. + while (*s != '\0') { + size_t idx = s - string_base; + + WChar c = Utf8Consume(&s); + if (c < 0x10000) { + utf16_str.push_back((UniChar)c); + } else { + /* Make a surrogate pair. */ + utf16_str.push_back((UniChar)(0xD800 + ((c - 0x10000) >> 10))); + utf16_str.push_back((UniChar)(0xDC00 + ((c - 0x10000) & 0x3FF))); + this->utf16_to_utf8.push_back(idx); + } + this->utf16_to_utf8.push_back(idx); + } + this->utf16_to_utf8.push_back(s - string_base); + + /* Query CoreText for word and cluster break information. */ + this->str_info.resize(utf16_to_utf8.size()); + + if (utf16_str.size() > 0) { + CFStringRef str = CFStringCreateWithCharactersNoCopy(kCFAllocatorDefault, &utf16_str[0], utf16_str.size(), kCFAllocatorNull); + + /* Get cluster breaks. */ + for (CFIndex i = 0; i < CFStringGetLength(str); ) { + CFRange r = CFStringGetRangeOfComposedCharactersAtIndex(str, i); + this->str_info[r.location].char_stop = true; + + i += r.length; + } + + /* Get word breaks. */ + CFStringTokenizerRef tokenizer = CFStringTokenizerCreate(kCFAllocatorDefault, str, CFRangeMake(0, CFStringGetLength(str)), kCFStringTokenizerUnitWordBoundary, _osx_locale); + + CFStringTokenizerTokenType tokenType = kCFStringTokenizerTokenNone; + while ((tokenType = CFStringTokenizerAdvanceToNextToken(tokenizer)) != kCFStringTokenizerTokenNone) { + /* Skip tokens that are white-space or punctuation tokens. */ + if ((tokenType & kCFStringTokenizerTokenHasNonLettersMask) != kCFStringTokenizerTokenHasNonLettersMask) { + CFRange r = CFStringTokenizerGetCurrentTokenRange(tokenizer); + this->str_info[r.location].word_stop = true; + } + } + + CFRelease(tokenizer); + CFRelease(str); + } + + /* End-of-string is always a valid stopping point. */ + this->str_info.back().char_stop = true; + this->str_info.back().word_stop = true; +} + +/* virtual */ size_t OSXStringIterator::SetCurPosition(size_t pos) +{ + /* Convert incoming position to an UTF-16 string index. */ + size_t utf16_pos = 0; + for (size_t i = 0; i < this->utf16_to_utf8.size(); i++) { + if (this->utf16_to_utf8[i] == pos) { + utf16_pos = i; + break; + } + } + + /* Sanitize in case we get a position inside a grapheme cluster. */ + while (utf16_pos > 0 && !this->str_info[utf16_pos].char_stop) utf16_pos--; + this->cur_pos = utf16_pos; + + return this->utf16_to_utf8[this->cur_pos]; +} + +/* virtual */ size_t OSXStringIterator::Next(IterType what) +{ + assert(this->cur_pos <= this->utf16_to_utf8.size()); + assert(what == StringIterator::ITER_CHARACTER || what == StringIterator::ITER_WORD); + + if (this->cur_pos == this->utf16_to_utf8.size()) return END; + + do { + this->cur_pos++; + } while (this->cur_pos < this->utf16_to_utf8.size() && (what == ITER_WORD ? !this->str_info[this->cur_pos].word_stop : !this->str_info[this->cur_pos].char_stop)); + + return this->cur_pos == this->utf16_to_utf8.size() ? END : this->utf16_to_utf8[this->cur_pos]; +} + +/* virtual */ size_t OSXStringIterator::Prev(IterType what) +{ + assert(this->cur_pos <= this->utf16_to_utf8.size()); + assert(what == StringIterator::ITER_CHARACTER || what == StringIterator::ITER_WORD); + + if (this->cur_pos == 0) return END; + + do { + this->cur_pos--; + } while (this->cur_pos > 0 && (what == ITER_WORD ? !this->str_info[this->cur_pos].word_stop : !this->str_info[this->cur_pos].char_stop)); + + return this->utf16_to_utf8[this->cur_pos]; +} + +/* static */ StringIterator *OSXStringIterator::Create() +{ + if (!MacOSVersionIsAtLeast(10, 5, 0)) return NULL; + + return new OSXStringIterator(); +} + #else void MacOSSetCurrentLocaleName(const char *iso_code) {} @@ -63,4 +180,9 @@ int MacOSStringCompare(const char *s1, const char *s2) { return 0; } + +/* static */ StringIterator *OSXStringIterator::Create() +{ + return NULL; +} #endif /* (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_5) */ diff --git a/src/os/macosx/string_osx.h b/src/os/macosx/string_osx.h index 90bac48d23..ea7904f549 100644 --- a/src/os/macosx/string_osx.h +++ b/src/os/macosx/string_osx.h @@ -12,6 +12,32 @@ #ifndef STRING_OSX_H #define STRING_OSX_H +#include "../../gfx_layout.h" +#include "../../string_base.h" +#include + +/** String iterator using CoreText as a backend. */ +class OSXStringIterator : public StringIterator { + /** Break info for a character. */ + struct CharInfo { + bool word_stop : 1; ///< Code point is suitable as a word break. + bool char_stop : 1; ///< Code point is the start of a grapheme cluster, i.e. a "character". + }; + + std::vector str_info; ///< Break information for each code point. + std::vector utf16_to_utf8; ///< Mapping from UTF-16 code point position to index in the UTF-8 source string. + + size_t cur_pos; ///< Current iteration position. + +public: + virtual void SetString(const char *s); + virtual size_t SetCurPosition(size_t pos); + virtual size_t Next(IterType what); + virtual size_t Prev(IterType what); + + static StringIterator *Create(); +}; + void MacOSSetCurrentLocaleName(const char *iso_code); int MacOSStringCompare(const char *s1, const char *s2); diff --git a/src/string.cpp b/src/string.cpp index 845225d6f5..96e1d82754 100644 --- a/src/string.cpp +++ b/src/string.cpp @@ -874,9 +874,19 @@ public: } }; +#if defined(WITH_COCOA) && !defined(STRGEN) && !defined(SETTINGSGEN) +/* static */ StringIterator *StringIterator::Create() +{ + StringIterator *i = OSXStringIterator::Create(); + if (i != NULL) return i; + + return new DefaultStringIterator(); +} +#else /* static */ StringIterator *StringIterator::Create() { return new DefaultStringIterator(); } +#endif /* defined(WITH_COCOA) && !defined(STRGEN) && !defined(SETTINGSGEN) */ #endif