mirror of https://github.com/OpenTTD/OpenTTD.git
(svn r9354) [0.5] -Backport from trunk (r8975, r9003, r9011, r9012):
-Regression: [win32] Possible buffer overflow if unicode text is pasted into an input box and needs trimming. (r8975) -Codechange: Introduce a function Utf8PrevCharLen that finds the starting character of an UTF-8 sequence from a given position and returns the length to the first UTF-8 encoding byte of that sequence. (r9003) -Codechange: Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset (r9011) -Fix: When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'. (r9012) -Codechange: Add a function Utf8TrimString() that properly trims a string to an UTF8 encoding seperation instead of somewhere in the wild (and use it in the chat area) (r9012)
This commit is contained in:
parent
140abde4ef
commit
3158cdaa7a
19
gfx.c
19
gfx.c
|
@ -270,7 +270,7 @@ static int TruncateString(char *str, int maxw)
|
||||||
if (w >= maxw) {
|
if (w >= maxw) {
|
||||||
// string got too big... insert dotdotdot
|
// string got too big... insert dotdotdot
|
||||||
ddd_pos[0] = ddd_pos[1] = ddd_pos[2] = '.';
|
ddd_pos[0] = ddd_pos[1] = ddd_pos[2] = '.';
|
||||||
ddd_pos[3] = 0;
|
ddd_pos[3] = '\0';
|
||||||
return ddd_w;
|
return ddd_w;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -410,11 +410,12 @@ uint32 FormatStringLinebreaks(char *str, int maxw)
|
||||||
for (;;) {
|
for (;;) {
|
||||||
char *last_space = NULL;
|
char *last_space = NULL;
|
||||||
int w = 0;
|
int w = 0;
|
||||||
|
char *s;
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
WChar c = Utf8Consume((const char **)&str);
|
WChar c = Utf8Consume((const char **)&str);
|
||||||
/* whitespace is where we will insert the line-break */
|
/* whitespace is where we will insert the line-break */
|
||||||
if (c == ' ') last_space = str;
|
if (IsWhitespace(c)) last_space = str;
|
||||||
|
|
||||||
if (IsPrintable(c)) {
|
if (IsPrintable(c)) {
|
||||||
w += GetCharacterWidth(size, c);
|
w += GetCharacterWidth(size, c);
|
||||||
|
@ -425,7 +426,7 @@ uint32 FormatStringLinebreaks(char *str, int maxw)
|
||||||
* 2. In all other cases force a linebreak at the last seen whitespace */
|
* 2. In all other cases force a linebreak at the last seen whitespace */
|
||||||
if (w > maxw) {
|
if (w > maxw) {
|
||||||
if (last_space == NULL) {
|
if (last_space == NULL) {
|
||||||
str[-1] = '\0';
|
*Utf8PrevChar(str) = '\0';
|
||||||
return num + (size << 16);
|
return num + (size << 16);
|
||||||
}
|
}
|
||||||
str = last_space;
|
str = last_space;
|
||||||
|
@ -443,9 +444,17 @@ uint32 FormatStringLinebreaks(char *str, int maxw)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
end_of_inner_loop:
|
end_of_inner_loop:
|
||||||
/* string didn't fit on line, so 'dummy' terminate and increase linecount */
|
/* String didn't fit on line (or a '\n' was encountered), so 'dummy' terminate
|
||||||
|
* and increase linecount. We use Utf8PrevChar() as also non 1 char long
|
||||||
|
* whitespace seperators are supported */
|
||||||
num++;
|
num++;
|
||||||
str[-1] = '\0';
|
s = Utf8PrevChar(str);
|
||||||
|
*s++ = '\0';
|
||||||
|
|
||||||
|
/* In which case (see above) we will shift remainder to left and close the gap */
|
||||||
|
if (str - s >= 1) {
|
||||||
|
for (; str[-1] != '\0';) *s++ = *str++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
25
misc_gui.c
25
misc_gui.c
|
@ -794,21 +794,21 @@ static void DelChar(Textbuf *tb, bool backspace)
|
||||||
WChar c;
|
WChar c;
|
||||||
uint width;
|
uint width;
|
||||||
size_t len;
|
size_t len;
|
||||||
|
char *s = tb->buf + tb->caretpos;
|
||||||
|
|
||||||
if (backspace) {
|
if (backspace) s = Utf8PrevChar(s);
|
||||||
do {
|
|
||||||
tb->caretpos--;
|
|
||||||
} while (IsUtf8Part(*(tb->buf + tb->caretpos)));
|
|
||||||
}
|
|
||||||
|
|
||||||
len = Utf8Decode(&c, tb->buf + tb->caretpos);
|
len = Utf8Decode(&c, s);
|
||||||
width = GetCharacterWidth(FS_NORMAL, c);
|
width = GetCharacterWidth(FS_NORMAL, c);
|
||||||
|
|
||||||
tb->width -= width;
|
tb->width -= width;
|
||||||
if (backspace) tb->caretxoffs -= width;
|
if (backspace) {
|
||||||
|
tb->caretpos -= len;
|
||||||
|
tb->caretxoffs -= width;
|
||||||
|
}
|
||||||
|
|
||||||
/* Move the remaining characters over the marker */
|
/* Move the remaining characters over the marker */
|
||||||
memmove(tb->buf + tb->caretpos, tb->buf + tb->caretpos + len, tb->length - tb->caretpos - len + 1);
|
memmove(s, s + len, tb->length - (s - tb->buf) - len + 1);
|
||||||
tb->length -= len;
|
tb->length -= len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -881,12 +881,9 @@ bool MoveTextBufferPos(Textbuf *tb, int navmode)
|
||||||
case WKC_LEFT:
|
case WKC_LEFT:
|
||||||
if (tb->caretpos != 0) {
|
if (tb->caretpos != 0) {
|
||||||
WChar c;
|
WChar c;
|
||||||
|
const char *s = Utf8PrevChar(tb->buf + tb->caretpos);
|
||||||
do {
|
Utf8Decode(&c, s);
|
||||||
tb->caretpos--;
|
tb->caretpos = s - tb->buf; // -= (tb->buf + tb->caretpos - s)
|
||||||
} while (IsUtf8Part(*(tb->buf + tb->caretpos)));
|
|
||||||
|
|
||||||
Utf8Decode(&c, tb->buf + tb->caretpos);
|
|
||||||
tb->caretxoffs -= GetCharacterWidth(FS_NORMAL, c);
|
tb->caretxoffs -= GetCharacterWidth(FS_NORMAL, c);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
26
string.c
26
string.c
|
@ -269,3 +269,29 @@ size_t Utf8Encode(char *buf, WChar c)
|
||||||
*buf = '?';
|
*buf = '?';
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Properly terminate an UTF8 string to some maximum length
|
||||||
|
* @param s string to check if it needs additional trimming
|
||||||
|
* @param maxlen the maximum length the buffer can have.
|
||||||
|
* @return the new length in bytes of the string (eg. strlen(new_string))
|
||||||
|
* @NOTE maxlen is the string length _INCLUDING_ the terminating '\0'
|
||||||
|
*/
|
||||||
|
size_t Utf8TrimString(char *s, size_t maxlen)
|
||||||
|
{
|
||||||
|
size_t length = 0;
|
||||||
|
const char *ptr = strchr(s, '\0');
|
||||||
|
while (*s != '\0') {
|
||||||
|
size_t len = Utf8EncodedCharLen(*s);
|
||||||
|
if (len == 0) break; // invalid encoding
|
||||||
|
|
||||||
|
/* Take care when a hard cutoff was made for the string and
|
||||||
|
* the last UTF8 sequence is invalid */
|
||||||
|
if (length + len >= maxlen || (s + len > ptr)) break;
|
||||||
|
s += len;
|
||||||
|
length += len;
|
||||||
|
}
|
||||||
|
|
||||||
|
*s = '\0';
|
||||||
|
return length;
|
||||||
|
}
|
||||||
|
|
47
string.h
47
string.h
|
@ -71,6 +71,7 @@ bool IsValidChar(WChar key, CharSetFilter afilter);
|
||||||
|
|
||||||
size_t Utf8Decode(WChar *c, const char *s);
|
size_t Utf8Decode(WChar *c, const char *s);
|
||||||
size_t Utf8Encode(char *buf, WChar c);
|
size_t Utf8Encode(char *buf, WChar c);
|
||||||
|
size_t Utf8TrimString(char *s, size_t maxlen);
|
||||||
|
|
||||||
|
|
||||||
static inline WChar Utf8Consume(const char **s)
|
static inline WChar Utf8Consume(const char **s)
|
||||||
|
@ -97,12 +98,43 @@ static inline size_t Utf8CharLen(WChar c)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the length of an UTF-8 encoded value based on a single char. This
|
||||||
|
* char should be the first byte of the UTF-8 encoding. If not, or encoding
|
||||||
|
* is invalid, return value is 0
|
||||||
|
*/
|
||||||
|
static inline size_t Utf8EncodedCharLen(char c)
|
||||||
|
{
|
||||||
|
if (GB(c, 3, 5) == 0x1E) return 4;
|
||||||
|
if (GB(c, 4, 4) == 0x0E) return 3;
|
||||||
|
if (GB(c, 5, 3) == 0x06) return 2;
|
||||||
|
if (GB(c, 7, 1) == 0x00) return 1;
|
||||||
|
|
||||||
|
/* Invalid UTF8 start encoding */
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/* Check if the given character is part of a UTF8 sequence */
|
/* Check if the given character is part of a UTF8 sequence */
|
||||||
static inline bool IsUtf8Part(char c)
|
static inline bool IsUtf8Part(char c)
|
||||||
{
|
{
|
||||||
return GB(c, 6, 2) == 2;
|
return GB(c, 6, 2) == 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieve the previous UNICODE character in an UTF-8 encoded string.
|
||||||
|
* @param s char pointer pointing to (the first char of) the next character
|
||||||
|
* @returns a pointer in 's' to the previous UNICODE character's first byte
|
||||||
|
* @note The function should not be used to determine the length of the previous
|
||||||
|
* encoded char because it might be an invalid/corrupt start-sequence
|
||||||
|
*/
|
||||||
|
static inline char *Utf8PrevChar(const char *s)
|
||||||
|
{
|
||||||
|
const char *ret = s;
|
||||||
|
while (IsUtf8Part(*--ret));
|
||||||
|
return (char*)ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static inline bool IsPrintable(WChar c)
|
static inline bool IsPrintable(WChar c)
|
||||||
{
|
{
|
||||||
|
@ -112,5 +144,20 @@ static inline bool IsPrintable(WChar c)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check whether UNICODE character is whitespace or not
|
||||||
|
* @param c UNICODE character to check
|
||||||
|
* @return a boolean value whether 'c' is a whitespace character or not
|
||||||
|
* @see http://www.fileformat.info/info/unicode/category/Zs/list.htm
|
||||||
|
*/
|
||||||
|
static inline bool IsWhitespace(WChar c)
|
||||||
|
{
|
||||||
|
return
|
||||||
|
c == 0x0020 /* SPACE */ ||
|
||||||
|
c == 0x00A0 /* NO-BREAK SPACE */ ||
|
||||||
|
c == 0x3000 /* IDEOGRAPHIC SPACE */
|
||||||
|
;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#endif /* STRING_H */
|
#endif /* STRING_H */
|
||||||
|
|
|
@ -27,7 +27,7 @@ typedef struct TextEffect {
|
||||||
uint32 params_2;
|
uint32 params_2;
|
||||||
} TextEffect;
|
} TextEffect;
|
||||||
|
|
||||||
#define MAX_TEXTMESSAGE_LENGTH 150
|
#define MAX_TEXTMESSAGE_LENGTH 200
|
||||||
|
|
||||||
typedef struct TextMessage {
|
typedef struct TextMessage {
|
||||||
char message[MAX_TEXTMESSAGE_LENGTH];
|
char message[MAX_TEXTMESSAGE_LENGTH];
|
||||||
|
@ -77,6 +77,9 @@ void CDECL AddTextMessage(uint16 color, uint8 duration, const char *message, ...
|
||||||
vsnprintf(buf, lengthof(buf), message, va);
|
vsnprintf(buf, lengthof(buf), message, va);
|
||||||
va_end(va);
|
va_end(va);
|
||||||
|
|
||||||
|
|
||||||
|
Utf8TrimString(buf, MAX_TEXTMESSAGE_LENGTH);
|
||||||
|
|
||||||
/* Force linebreaks for strings that are too long */
|
/* Force linebreaks for strings that are too long */
|
||||||
lines = GB(FormatStringLinebreaks(buf, _textmsg_box.width - 8), 0, 16) + 1;
|
lines = GB(FormatStringLinebreaks(buf, _textmsg_box.width - 8), 0, 16) + 1;
|
||||||
if (lines >= MAX_CHAT_MESSAGES) return;
|
if (lines >= MAX_CHAT_MESSAGES) return;
|
||||||
|
|
11
win32.c
11
win32.c
|
@ -995,16 +995,18 @@ bool InsertTextBufferClipboard(Textbuf *tb)
|
||||||
width = length = 0;
|
width = length = 0;
|
||||||
|
|
||||||
for (ptr = utf8_buf; (c = Utf8Consume(&ptr)) != '\0';) {
|
for (ptr = utf8_buf; (c = Utf8Consume(&ptr)) != '\0';) {
|
||||||
|
size_t len;
|
||||||
byte charwidth;
|
byte charwidth;
|
||||||
|
|
||||||
if (!IsPrintable(c)) break;
|
if (!IsPrintable(c)) break;
|
||||||
if (tb->length + length >= tb->maxlength - 1) break;
|
|
||||||
charwidth = GetCharacterWidth(FS_NORMAL, c);
|
|
||||||
|
|
||||||
|
len = Utf8CharLen(c);
|
||||||
|
if (tb->length + length >= tb->maxlength - (uint16)len) break;
|
||||||
|
|
||||||
|
charwidth = GetCharacterWidth(FS_NORMAL, c);
|
||||||
if (tb->maxwidth != 0 && width + tb->width + charwidth > tb->maxwidth) break;
|
if (tb->maxwidth != 0 && width + tb->width + charwidth > tb->maxwidth) break;
|
||||||
|
|
||||||
width += charwidth;
|
width += charwidth;
|
||||||
length += Utf8CharLen(c);
|
length += len;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (length == 0) return false;
|
if (length == 0) return false;
|
||||||
|
@ -1016,6 +1018,7 @@ bool InsertTextBufferClipboard(Textbuf *tb)
|
||||||
|
|
||||||
tb->length += length;
|
tb->length += length;
|
||||||
tb->caretpos += length;
|
tb->caretpos += length;
|
||||||
|
assert(tb->length < tb->maxlength);
|
||||||
tb->buf[tb->length] = '\0'; // terminating zero
|
tb->buf[tb->length] = '\0'; // terminating zero
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
Loading…
Reference in New Issue