handle utf8 characters in languages, convert appropriately for #319

This commit is contained in:
IntelOrca 2014-08-17 19:20:45 +01:00
parent 83c047e632
commit 92b74a8deb
1 changed files with 34 additions and 7 deletions

View File

@ -52,6 +52,24 @@ char **language_strings = NULL;
static int language_open_file(const char *filename);
static int utf8_get_next(char *char_ptr, char **nextchar_ptr)
{
int result;
int numBytes;
if (!(char_ptr[0] & 0x80)) {
result = char_ptr[0];
numBytes = 1;
} else if (!(char_ptr[0] & 0x20)) {
result = ((char_ptr[0] & 0x1F) << 6) | (char_ptr[1] & 0x3F);
numBytes = 2;
}
if (nextchar_ptr != NULL)
*nextchar_ptr = char_ptr + numBytes;
return result;
}
const char *language_get_string(rct_string_id id)
{
const char *rct = RCT2_ADDRESS(0x009BF2D4, const char*)[id];
@ -114,12 +132,21 @@ static int language_open_file(const char *filename)
for (i = 0; i < language_buffer_size; i++) {
char *src = &language_buffer[i];
// Handle UTF-8
char *srcNext;
int utf8Char = utf8_get_next(src, &srcNext);
i += srcNext - src - 1;
if (utf8Char > 0xFF)
utf8Char = '?';
else if (utf8Char > 0x7F)
utf8Char &= 0xFF;
switch (mode) {
case 0:
// Search for a comment
if (*src == '#') {
if (utf8Char == '#') {
mode = 3;
} else if (*src == ':' && string_no != -1) {
} else if (utf8Char == ':' && string_no != -1) {
// Search for colon
dst = src + 1;
language_strings[string_no] = dst;
@ -134,19 +161,19 @@ static int language_open_file(const char *filename)
break;
case 1:
// Copy string over, stop at line break
if (*src == '{') {
if (utf8Char == '{') {
token = src + 1;
mode = 2;
} else if (*src == '\n' || *src == '\r') {
} else if (utf8Char == '\n' || *src == '\r') {
*dst = 0;
mode = 0;
} else {
*dst++ = *src;
*dst++ = utf8Char;
}
break;
case 2:
// Read token, convert to code
if (*src == '}') {
if (utf8Char == '}') {
int tokenLength = min(src - token, sizeof(tokenBuffer) - 1);
memcpy(tokenBuffer, token, tokenLength);
tokenBuffer[tokenLength] = 0;
@ -158,7 +185,7 @@ static int language_open_file(const char *filename)
}
break;
case 3:
if (*src == '\n' || *src == '\r') {
if (utf8Char == '\n' || utf8Char == '\r') {
mode = 0;
}
}