OpenRCT2/src/openrct2/localisation/Convert.cpp

125 lines
3.4 KiB
C++

/*****************************************************************************
* Copyright (c) 2014-2024 OpenRCT2 developers
*
* For a complete list of all authors, please refer to contributors.md
* Interested in contributing? Visit https://github.com/OpenRCT2/OpenRCT2
*
* OpenRCT2 is licensed under the GNU General Public License version 3.
*****************************************************************************/
#include "../core/String.hpp"
#include "ConversionTables.h"
#include "Language.h"
#include <limits>
#include <stdexcept>
/**
* Decodes an RCT2 string to a wide char string still in the original code page.
* An RCT2 string is a multi-byte string where every two-byte code point is preceded with a byte value of 255.
*/
static std::wstring DecodeToWideChar(std::string_view src)
{
std::wstring decoded;
decoded.reserve(src.size());
for (auto it = src.begin(); it != src.end();)
{
uint8_t c = *it++;
if (c == 255)
{
// Push next two characters
uint8_t a = 0;
uint8_t b = 0;
if (it != src.end())
{
a = *it++;
if (it != src.end())
{
b = *it++;
}
else
{
// 2nd byte for double byte character is missing
break;
}
}
else
{
// 1st byte for double byte character is missing
break;
}
wchar_t cp = (a << 8) | b;
decoded.push_back(cp);
}
else
{
// Push character
decoded.push_back(c);
}
}
return decoded;
}
static std::string DecodeToMultiByte(std::string_view src)
{
auto wide = DecodeToWideChar(src);
std::string result;
result.reserve(wide.size());
for (auto cc : wide)
{
if (cc <= 255)
{
result.push_back(cc);
}
else
{
result.push_back((cc >> 8) & 0xFF);
result.push_back(cc & 0xFF);
}
}
return result;
}
static int32_t GetCodePageForRCT2Language(RCT2LanguageId languageId)
{
switch (languageId)
{
case RCT2LanguageId::Japanese:
return OpenRCT2::CodePage::CP_932;
case RCT2LanguageId::ChineseSimplified:
return OpenRCT2::CodePage::CP_936;
case RCT2LanguageId::Korean:
return OpenRCT2::CodePage::CP_949;
case RCT2LanguageId::ChineseTraditional:
return OpenRCT2::CodePage::CP_950;
default:
return OpenRCT2::CodePage::CP_1252;
}
}
template<typename TConvertFunc> static std::string DecodeConvertWithTable(std::string_view src, TConvertFunc func)
{
auto decoded = DecodeToWideChar(src);
std::wstring u16;
u16.reserve(decoded.size());
for (auto cc : decoded)
{
u16.push_back(func(cc));
}
return String::ToUtf8(u16);
}
std::string RCT2StringToUTF8(std::string_view src, RCT2LanguageId languageId)
{
auto codePage = GetCodePageForRCT2Language(languageId);
if (codePage == OpenRCT2::CodePage::CP_1252)
{
// The code page used by RCT2 was not quite 1252 as some codes were used for Polish characters.
return DecodeConvertWithTable(src, EncodingConvertRCT2ToUnicode);
}
auto decoded = DecodeToMultiByte(src);
return String::ConvertToUtf8(decoded, codePage);
}