OpenRCT2/src/openrct2/localisation/Convert.cpp

164 lines
4.6 KiB
C++

/*****************************************************************************
* Copyright (c) 2014-2020 OpenRCT2 developers
*
* For a complete list of all authors, please refer to contributors.md
* Interested in contributing? Visit https://github.com/OpenRCT2/OpenRCT2
*
* OpenRCT2 is licensed under the GNU General Public License version 3.
*****************************************************************************/
#include "../core/String.hpp"
#include "ConversionTables.h"
#include "Language.h"
#include <algorithm>
#include <limits>
#include <stdexcept>
/**
* Decodes an RCT2 string to a wide char string still in the original code page.
* An RCT2 string is a multi-byte string where every two-byte code point is preceded with a byte value of 255.
*/
static std::wstring DecodeToWideChar(std::string_view src)
{
std::wstring decoded;
decoded.reserve(src.size());
for (auto it = src.begin(); it != src.end();)
{
uint8_t c = *it++;
if (c == 255)
{
// Push next two characters
uint8_t a = 0;
uint8_t b = 0;
if (it != src.end())
{
a = *it++;
if (it != src.end())
{
b = *it++;
}
else
{
// 2nd byte for double byte character is missing
break;
}
}
else
{
// 1st byte for double byte character is missing
break;
}
wchar_t cp = (a << 8) | b;
decoded.push_back(cp);
}
else
{
// Push character
decoded.push_back(c);
}
}
return decoded;
}
static std::string DecodeToMultiByte(std::string_view src)
{
auto wide = DecodeToWideChar(src);
std::string result;
result.reserve(wide.size());
for (auto cc : wide)
{
if (cc <= 255)
{
result.push_back(cc);
}
else
{
result.push_back((cc >> 8) & 0xFF);
result.push_back(cc & 0xFF);
}
}
return result;
}
/**
* Encodes a UTF-8 string as an RCT2 string.
*/
static std::string Encode(const std::string& src)
{
std::string dst;
const utf8* ch = src.data();
int32_t codepoint;
while ((codepoint = utf8_get_next(ch, &ch)) != 0)
{
codepoint = encoding_convert_unicode_to_rct2(codepoint);
if (codepoint <= std::numeric_limits<uint8_t>::max())
{
dst.push_back(codepoint);
}
else if (codepoint <= std::numeric_limits<uint16_t>::max())
{
dst.push_back(static_cast<char>(static_cast<uint8_t>(0xFF)));
dst.push_back((codepoint >> 8) & 0xFF);
dst.push_back(codepoint & 0xFF);
}
else
{
// RCT2 strings do not support code points greater than 65535, replace them with '?'
dst.push_back('?');
}
}
return dst;
}
static int32_t GetCodePageForRCT2Language(RCT2LanguageId languageId)
{
switch (languageId)
{
case RCT2LanguageId::Japanese:
return CODE_PAGE::CP_932;
case RCT2LanguageId::ChineseSimplified:
return CODE_PAGE::CP_936;
case RCT2LanguageId::Korean:
return CODE_PAGE::CP_949;
case RCT2LanguageId::ChineseTraditional:
return CODE_PAGE::CP_950;
default:
return CODE_PAGE::CP_1252;
}
}
template<typename TConvertFunc> static std::string DecodeConvertWithTable(std::string_view src, TConvertFunc func)
{
auto decoded = DecodeToWideChar(src);
std::wstring u16;
u16.reserve(decoded.size());
for (auto cc : decoded)
{
u16.push_back(func(cc));
}
return String::ToUtf8(u16);
}
std::string rct2_to_utf8(std::string_view src, RCT2LanguageId languageId)
{
auto codePage = GetCodePageForRCT2Language(languageId);
if (codePage == CODE_PAGE::CP_1252)
{
// The code page used by RCT2 was not quite 1252 as some codes were used for Polish characters.
return DecodeConvertWithTable(src, encoding_convert_rct2_to_unicode);
}
auto decoded = DecodeToMultiByte(src);
return String::Convert(decoded, codePage, CODE_PAGE::CP_UTF8);
}
std::string utf8_to_rct2(std::string_view src)
{
// NOTE: This is only used for SC6 / SV6 files which don't store the language identifier
// because of this, we can only store in RCT2's CP_1252 format. We can preserve some
// unicode characters, but only those between 256 and 65535.
return Encode(std::string(src));
}