From 3158cdaa7aa8d38d8b8021af085f48034a745352 Mon Sep 17 00:00:00 2001 From: glx Date: Mon, 19 Mar 2007 21:42:05 +0000 Subject: [PATCH] (svn r9354) [0.5] -Backport from trunk (r8975, r9003, r9011, r9012): -Regression: [win32] Possible buffer overflow if unicode text is pasted into an input box and needs trimming. (r8975) -Codechange: Introduce a function Utf8PrevCharLen that finds the starting character of an UTF-8 sequence from a given position and returns the length to the first UTF-8 encoding byte of that sequence. (r9003) -Codechange: Rework Utf8PrevChar so that it returns a pointer to the previous UTF8 character's first byte instead of a byte-length offset (r9011) -Fix: When cutting strings into multiple lines also take into consideration whitespace characters of more than 1 byte length (eg IDEOGRAPHIC SPACE, IsWhitespace() function). When trimming such strings, account for multiple-byte long sequences so use *Utf8PrevChar(v) = '\0'. (r9012) -Codechange: Add a function Utf8TrimString() that properly trims a string to an UTF8 encoding seperation instead of somewhere in the wild (and use it in the chat area) (r9012) --- gfx.c | 19 ++++++++++++++----- misc_gui.c | 25 +++++++++++-------------- string.c | 26 ++++++++++++++++++++++++++ string.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ texteff.c | 5 ++++- win32.c | 11 +++++++---- 6 files changed, 109 insertions(+), 24 deletions(-) diff --git a/gfx.c b/gfx.c index d663437de3..b3b6bc8d8f 100644 --- a/gfx.c +++ b/gfx.c @@ -270,7 +270,7 @@ static int TruncateString(char *str, int maxw) if (w >= maxw) { // string got too big... insert dotdotdot ddd_pos[0] = ddd_pos[1] = ddd_pos[2] = '.'; - ddd_pos[3] = 0; + ddd_pos[3] = '\0'; return ddd_w; } } else { @@ -410,11 +410,12 @@ uint32 FormatStringLinebreaks(char *str, int maxw) for (;;) { char *last_space = NULL; int w = 0; + char *s; for (;;) { WChar c = Utf8Consume((const char **)&str); /* whitespace is where we will insert the line-break */ - if (c == ' ') last_space = str; + if (IsWhitespace(c)) last_space = str; if (IsPrintable(c)) { w += GetCharacterWidth(size, c); @@ -425,7 +426,7 @@ uint32 FormatStringLinebreaks(char *str, int maxw) * 2. In all other cases force a linebreak at the last seen whitespace */ if (w > maxw) { if (last_space == NULL) { - str[-1] = '\0'; + *Utf8PrevChar(str) = '\0'; return num + (size << 16); } str = last_space; @@ -443,9 +444,17 @@ uint32 FormatStringLinebreaks(char *str, int maxw) } } end_of_inner_loop: - /* string didn't fit on line, so 'dummy' terminate and increase linecount */ + /* String didn't fit on line (or a '\n' was encountered), so 'dummy' terminate + * and increase linecount. We use Utf8PrevChar() as also non 1 char long + * whitespace seperators are supported */ num++; - str[-1] = '\0'; + s = Utf8PrevChar(str); + *s++ = '\0'; + + /* In which case (see above) we will shift remainder to left and close the gap */ + if (str - s >= 1) { + for (; str[-1] != '\0';) *s++ = *str++; + } } } diff --git a/misc_gui.c b/misc_gui.c index 93ee8fa200..5a93e1b63b 100644 --- a/misc_gui.c +++ b/misc_gui.c @@ -794,21 +794,21 @@ static void DelChar(Textbuf *tb, bool backspace) WChar c; uint width; size_t len; + char *s = tb->buf + tb->caretpos; - if (backspace) { - do { - tb->caretpos--; - } while (IsUtf8Part(*(tb->buf + tb->caretpos))); - } + if (backspace) s = Utf8PrevChar(s); - len = Utf8Decode(&c, tb->buf + tb->caretpos); + len = Utf8Decode(&c, s); width = GetCharacterWidth(FS_NORMAL, c); tb->width -= width; - if (backspace) tb->caretxoffs -= width; + if (backspace) { + tb->caretpos -= len; + tb->caretxoffs -= width; + } /* Move the remaining characters over the marker */ - memmove(tb->buf + tb->caretpos, tb->buf + tb->caretpos + len, tb->length - tb->caretpos - len + 1); + memmove(s, s + len, tb->length - (s - tb->buf) - len + 1); tb->length -= len; } @@ -881,12 +881,9 @@ bool MoveTextBufferPos(Textbuf *tb, int navmode) case WKC_LEFT: if (tb->caretpos != 0) { WChar c; - - do { - tb->caretpos--; - } while (IsUtf8Part(*(tb->buf + tb->caretpos))); - - Utf8Decode(&c, tb->buf + tb->caretpos); + const char *s = Utf8PrevChar(tb->buf + tb->caretpos); + Utf8Decode(&c, s); + tb->caretpos = s - tb->buf; // -= (tb->buf + tb->caretpos - s) tb->caretxoffs -= GetCharacterWidth(FS_NORMAL, c); return true; diff --git a/string.c b/string.c index c990ddd647..c99d5c6780 100644 --- a/string.c +++ b/string.c @@ -269,3 +269,29 @@ size_t Utf8Encode(char *buf, WChar c) *buf = '?'; return 1; } + +/** + * Properly terminate an UTF8 string to some maximum length + * @param s string to check if it needs additional trimming + * @param maxlen the maximum length the buffer can have. + * @return the new length in bytes of the string (eg. strlen(new_string)) + * @NOTE maxlen is the string length _INCLUDING_ the terminating '\0' + */ +size_t Utf8TrimString(char *s, size_t maxlen) +{ + size_t length = 0; + const char *ptr = strchr(s, '\0'); + while (*s != '\0') { + size_t len = Utf8EncodedCharLen(*s); + if (len == 0) break; // invalid encoding + + /* Take care when a hard cutoff was made for the string and + * the last UTF8 sequence is invalid */ + if (length + len >= maxlen || (s + len > ptr)) break; + s += len; + length += len; + } + + *s = '\0'; + return length; +} diff --git a/string.h b/string.h index c3bc45280f..7d8fda209a 100644 --- a/string.h +++ b/string.h @@ -71,6 +71,7 @@ bool IsValidChar(WChar key, CharSetFilter afilter); size_t Utf8Decode(WChar *c, const char *s); size_t Utf8Encode(char *buf, WChar c); +size_t Utf8TrimString(char *s, size_t maxlen); static inline WChar Utf8Consume(const char **s) @@ -97,12 +98,43 @@ static inline size_t Utf8CharLen(WChar c) } +/** + * Return the length of an UTF-8 encoded value based on a single char. This + * char should be the first byte of the UTF-8 encoding. If not, or encoding + * is invalid, return value is 0 + */ +static inline size_t Utf8EncodedCharLen(char c) +{ + if (GB(c, 3, 5) == 0x1E) return 4; + if (GB(c, 4, 4) == 0x0E) return 3; + if (GB(c, 5, 3) == 0x06) return 2; + if (GB(c, 7, 1) == 0x00) return 1; + + /* Invalid UTF8 start encoding */ + return 0; +} + + /* Check if the given character is part of a UTF8 sequence */ static inline bool IsUtf8Part(char c) { return GB(c, 6, 2) == 2; } +/** + * Retrieve the previous UNICODE character in an UTF-8 encoded string. + * @param s char pointer pointing to (the first char of) the next character + * @returns a pointer in 's' to the previous UNICODE character's first byte + * @note The function should not be used to determine the length of the previous + * encoded char because it might be an invalid/corrupt start-sequence + */ +static inline char *Utf8PrevChar(const char *s) +{ + const char *ret = s; + while (IsUtf8Part(*--ret)); + return (char*)ret; +} + static inline bool IsPrintable(WChar c) { @@ -112,5 +144,20 @@ static inline bool IsPrintable(WChar c) return true; } +/** + * Check whether UNICODE character is whitespace or not + * @param c UNICODE character to check + * @return a boolean value whether 'c' is a whitespace character or not + * @see http://www.fileformat.info/info/unicode/category/Zs/list.htm + */ +static inline bool IsWhitespace(WChar c) +{ + return + c == 0x0020 /* SPACE */ || + c == 0x00A0 /* NO-BREAK SPACE */ || + c == 0x3000 /* IDEOGRAPHIC SPACE */ + ; +} + #endif /* STRING_H */ diff --git a/texteff.c b/texteff.c index bddbb11a6f..945d66112b 100644 --- a/texteff.c +++ b/texteff.c @@ -27,7 +27,7 @@ typedef struct TextEffect { uint32 params_2; } TextEffect; -#define MAX_TEXTMESSAGE_LENGTH 150 +#define MAX_TEXTMESSAGE_LENGTH 200 typedef struct TextMessage { char message[MAX_TEXTMESSAGE_LENGTH]; @@ -77,6 +77,9 @@ void CDECL AddTextMessage(uint16 color, uint8 duration, const char *message, ... vsnprintf(buf, lengthof(buf), message, va); va_end(va); + + Utf8TrimString(buf, MAX_TEXTMESSAGE_LENGTH); + /* Force linebreaks for strings that are too long */ lines = GB(FormatStringLinebreaks(buf, _textmsg_box.width - 8), 0, 16) + 1; if (lines >= MAX_CHAT_MESSAGES) return; diff --git a/win32.c b/win32.c index bf4ec84577..2859b5dc13 100644 --- a/win32.c +++ b/win32.c @@ -995,16 +995,18 @@ bool InsertTextBufferClipboard(Textbuf *tb) width = length = 0; for (ptr = utf8_buf; (c = Utf8Consume(&ptr)) != '\0';) { + size_t len; byte charwidth; - if (!IsPrintable(c)) break; - if (tb->length + length >= tb->maxlength - 1) break; - charwidth = GetCharacterWidth(FS_NORMAL, c); + len = Utf8CharLen(c); + if (tb->length + length >= tb->maxlength - (uint16)len) break; + + charwidth = GetCharacterWidth(FS_NORMAL, c); if (tb->maxwidth != 0 && width + tb->width + charwidth > tb->maxwidth) break; width += charwidth; - length += Utf8CharLen(c); + length += len; } if (length == 0) return false; @@ -1016,6 +1018,7 @@ bool InsertTextBufferClipboard(Textbuf *tb) tb->length += length; tb->caretpos += length; + assert(tb->length < tb->maxlength); tb->buf[tb->length] = '\0'; // terminating zero return true;