diff --git a/src/util/stringutil.cpp b/src/util/stringutil.cpp index c104bd5e..12936343 100644 --- a/src/util/stringutil.cpp +++ b/src/util/stringutil.cpp @@ -128,7 +128,7 @@ inline uint utf8_len(ubyte cp) { if ((cp & 0xF8) == 0xF0) { return 4; } - return 0; + throw std::runtime_error("utf8 decode error"); } uint32_t util::decode_utf8(uint& size, const char* chr) { @@ -156,6 +156,16 @@ size_t util::crop_utf8(std::string_view s, size_t maxSize) { return pos; } +size_t util::length_utf8(std::string_view s) { + size_t length = 0; + size_t pos = 0; + while (pos < s.length()) { + pos += utf8_len(s[pos]); + length++; + } + return length; +} + template std::string xstr2str_utf8(const std::basic_string& xs) { std::vector chars; diff --git a/src/util/stringutil.hpp b/src/util/stringutil.hpp index c7bc2fb5..f584a53c 100644 --- a/src/util/stringutil.hpp +++ b/src/util/stringutil.hpp @@ -44,6 +44,11 @@ namespace util { /// @param maxSize max encoded string length after crop /// @return cropped string size (less or equal to maxSize) size_t crop_utf8(std::string_view s, size_t maxSize); + + /// @brief Measure utf8-encoded string length + /// @param s source encoded string + /// @return unicode string length (number of codepoints) + size_t length_utf8(std::string_view s); bool is_integer(const std::string& text); bool is_integer(const std::wstring& text);