mirror of https://github.com/OpenRCT2/OpenRCT2.git
Fix out-of-bounds access in convert_multibyte_charset
convert_multibyte_charset unconditionally assumes that a 0xFF is always followed by two more bytes. This is not the case with BATFL.DAT, whose string at offset 0x2D6 (languageId = 0xA) ends with bytes 0xFF, 0xC2, 0x00. Therefore, it seems like 0x00 is used as both the string terminator and the second byte of a multicharacter sequence in this case. This commit does not change the original behaviour at all, and prevents the code from looking for a 0x00 after the string actually ends.
This commit is contained in:
parent
da8964efab
commit
1a521c53fb
|
@ -147,17 +147,23 @@ static utf8 * convert_multibyte_charset(const char * src, size_t srcMaxSize, int
|
|||
constexpr char CODEPOINT_DOUBLEBYTE = (char)0xFF;
|
||||
|
||||
auto sb = StringBuilder(64);
|
||||
for (const char * ch = src; (ch < src + srcMaxSize) && (*ch != 0);)
|
||||
for (const char * ch = src; (ch < src + srcMaxSize) && (*ch != '\0');)
|
||||
{
|
||||
if (*ch == CODEPOINT_DOUBLEBYTE)
|
||||
{
|
||||
ch++;
|
||||
uint8 a = *ch++;
|
||||
uint8 b = *ch++;
|
||||
wchar_t codepoint16 = (wchar_t)((a << 8) | b);
|
||||
if (ch < src + srcMaxSize)
|
||||
{
|
||||
uint8 a = *ch++;
|
||||
if (a != '\0')
|
||||
{
|
||||
uint8 b = *ch++;
|
||||
wchar_t codepoint16 = (wchar_t)((a << 8) | b);
|
||||
|
||||
codepoint16 = convert_specific_language_character_to_unicode(languageId, codepoint16);
|
||||
sb.Append(codepoint16);
|
||||
codepoint16 = convert_specific_language_character_to_unicode(languageId, codepoint16);
|
||||
sb.Append(codepoint16);
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue