OpenRCT2/src/openrct2/core/String.hpp

/*****************************************************************************
 * Copyright (c) 2014-2020 OpenRCT2 developers
 *
 * For a complete list of all authors, please refer to contributors.md
 * Interested in contributing? Visit https://github.com/OpenRCT2/OpenRCT2
 *
 * OpenRCT2 is licensed under the GNU General Public License version 3.
 *****************************************************************************/

#pragma once

#include "../common.h"

#include <cstdarg>
#include <cstddef>
#include <optional>
#include <string>
#include <string_view>
#include <vector>

namespace CODE_PAGE
{
    // windows.h defines CP_UTF8
#undef CP_UTF8

    constexpr int32_t CP_932 = 932;    // ANSI/OEM Japanese; Japanese (Shift-JIS)
    constexpr int32_t CP_936 = 936;    // ANSI/OEM Simplified Chinese (PRC, Singapore); Chinese Simplified (GB2312)
    constexpr int32_t CP_949 = 949;    // ANSI/OEM Korean (Unified Hangul Code)
    constexpr int32_t CP_950 = 950;    // ANSI/OEM Traditional Chinese (Taiwan; Hong Kong SAR, PRC); Chinese Traditional (Big5)
    constexpr int32_t CP_1252 = 1252;  // ANSI Latin 1; Western European (Windows)
    constexpr int32_t CP_UTF8 = 65001; // Unicode (UTF-8)
} // namespace CODE_PAGE

namespace String
{
    constexpr const utf8* Empty = "";

    std::string ToStd(const utf8* str);
    std::string StdFormat_VA(const utf8* format, va_list args);
    std::string StdFormat(const utf8* format, ...);
    std::string ToUtf8(std::wstring_view src);
    std::wstring ToWideChar(std::string_view src);

    /**
     * Creates a string_view from a char pointer with a length up to either the
     * first null terminator or a given maximum length, whatever is smallest.
     */
    std::string_view ToStringView(const char* ch, size_t maxLen);

    bool IsNullOrEmpty(const utf8* str);
    int32_t Compare(const std::string& a, const std::string& b, bool ignoreCase = false);
    int32_t Compare(const utf8* a, const utf8* b, bool ignoreCase = false);
    bool Equals(std::string_view a, std::string_view b, bool ignoreCase = false);
    bool Equals(const std::string& a, const std::string& b, bool ignoreCase = false);
    bool Equals(const utf8* a, const utf8* b, bool ignoreCase = false);
    bool StartsWith(std::string_view str, std::string_view match, bool ignoreCase = false);
    bool EndsWith(std::string_view str, std::string_view match, bool ignoreCase = false);
    size_t IndexOf(const utf8* str, utf8 match, size_t startIndex = 0);
    ptrdiff_t LastIndexOf(const utf8* str, utf8 match);

    /**
     * Gets the length of the given string in codepoints.
     */
    size_t LengthOf(const utf8* str);

    /**
     * Gets the size of the given string in bytes excluding the null terminator.
     */
    size_t SizeOf(const utf8* str);

    utf8* Set(utf8* buffer, size_t bufferSize, const utf8* src);
    utf8* Set(utf8* buffer, size_t bufferSize, const utf8* src, size_t srcSize);
    utf8* Append(utf8* buffer, size_t bufferSize, const utf8* src);
    utf8* Format(utf8* buffer, size_t bufferSize, const utf8* format, ...);
    utf8* Format(const utf8* format, ...);
    utf8* Format_VA(const utf8* format, va_list args);
    utf8* AppendFormat(utf8* buffer, size_t bufferSize, const utf8* format, ...);
    utf8* Duplicate(const std::string& src);
    utf8* Duplicate(const utf8* src);

    /**
     * Helper method to free the string a string pointer points to and set it to a replacement string.
     */
    utf8* DiscardUse(utf8** ptr, utf8* replacement);

    /**
     * Helper method to free the string a string pointer points to and set it to a copy of a replacement string.
     */
    utf8* DiscardDuplicate(utf8** ptr, const utf8* replacement);

    /**
     * Splits the given string by a delimiter and returns the values as a new string array.
     * @returns the number of values.
     */
    std::vector<std::string> Split(std::string_view s, std::string_view delimiter);

    utf8* SkipBOM(utf8* buffer);
    const utf8* SkipBOM(const utf8* buffer);

    size_t GetCodepointLength(codepoint_t codepoint);
    codepoint_t GetNextCodepoint(utf8* ptr, utf8** nextPtr = nullptr);
    codepoint_t GetNextCodepoint(const utf8* ptr, const utf8** nextPtr = nullptr);
    utf8* WriteCodepoint(utf8* dst, codepoint_t codepoint);
    void AppendCodepoint(std::string& str, codepoint_t codepoint);

    bool IsWhiteSpace(codepoint_t codepoint);
    utf8* Trim(utf8* str);
    const utf8* TrimStart(const utf8* str);
    utf8* TrimStart(utf8* buffer, size_t bufferSize, const utf8* src);
    std::string TrimStart(const std::string& s);
    std::string Trim(const std::string& s);

    /**
     * Converts a multi-byte string from one code page to another.
     */
    std::string Convert(std::string_view src, int32_t srcCodePage, int32_t dstCodePage);

    /**
     * Returns an uppercased version of a UTF-8 string.
     */
    std::string ToUpper(std::string_view src);

    template<typename T> std::optional<T> Parse(std::string_view input)
    {
        if (input.size() == 0)
            return std::nullopt;

        T result = 0;
        for (size_t i = 0; i < input.size(); i++)
        {
            auto chr = input[i];
            if (chr >= '0' && chr <= '9')
            {
                auto digit = chr - '0';
                auto last = result;
                result = static_cast<T>((result * 10) + digit);
                if (result <= last)
                {
                    // Overflow, number too large for type
                    return std::nullopt;
                }
            }
            else
            {
                // Bad character
                return std::nullopt;
            }
        }
        return result;
    }

    /**
     * Returns codepoint size or no value if not valid
     */
    constexpr std::optional<int> UTF8GetCodePointSize(std::string_view v)
    {
        if (v.size() >= 1 && !(v[0] & 0x80))
        {
            return { 1 };
        }
        if (v.size() >= 2 && ((v[0] & 0xE0) == 0xC0))
        {
            return { 2 };
        }
        if (v.size() >= 3 && ((v[0] & 0xF0) == 0xE0))
        {
            return { 3 };
        }
        if (v.size() >= 4 && ((v[0] & 0xF8) == 0xF0))
        {
            return { 4 };
        }
        return std::nullopt;
    }

    /**
     * Truncates a string to at most `size` bytes,
     * making sure not to cut in the middle of a sequence.
     */
    std::string_view UTF8Truncate(std::string_view v, size_t size);

    // Escapes special characters in a string to the percentage equivalent that can be used in URLs.
    std::string URLEncode(std::string_view value);
} // namespace String

class CodepointView
{
private:
    std::string_view _str;

public:
    class iterator
    {
    private:
        std::string_view _str;
        size_t _index;

    public:
        iterator(std::string_view str, size_t index)
            : _str(str)
            , _index(index)
        {
        }

        bool operator==(const iterator& rhs) const
        {
            return _index == rhs._index;
        }
        bool operator!=(const iterator& rhs) const
        {
            return _index != rhs._index;
        }
        char32_t operator*() const
        {
            return GetNextCodepoint(&_str[_index], nullptr);
        }
        iterator& operator++()
        {
            if (_index < _str.size())
            {
                const utf8* nextch;
                GetNextCodepoint(&_str[_index], &nextch);
                _index = nextch - _str.data();
            }
            return *this;
        }
        iterator operator++(int)
        {
            auto result = *this;
            if (_index < _str.size())
            {
                const utf8* nextch;
                GetNextCodepoint(&_str[_index], &nextch);
                _index = nextch - _str.data();
            }
            return result;
        }

        size_t GetIndex() const
        {
            return _index;
        }

        static char32_t GetNextCodepoint(const char* ch, const char** next);
    };

    CodepointView(std::string_view str)
        : _str(str)
    {
    }

    iterator begin() const
    {
        return iterator(_str, 0);
    }

    iterator end() const
    {
        return iterator(_str, _str.size());
    }
};