fix unicode string literal escape
This commit is contained in:
parent
8b38d57966
commit
28c821006a
@ -32,7 +32,7 @@ protected:
|
||||
void goBack(size_t count = 1);
|
||||
void reset();
|
||||
|
||||
int64_t parseSimpleInt(int base);
|
||||
int64_t parseSimpleInt(int base, size_t maxLength = 0xFFFFFFFF);
|
||||
dv::value parseNumber(int sign);
|
||||
dv::value parseNumber();
|
||||
StringT parseString(CharT chr, bool closeRequired = true);
|
||||
|
||||
@ -349,7 +349,10 @@ std::basic_string<CharT> BasicParser<CharT>::parseXmlName() {
|
||||
}
|
||||
|
||||
template <typename CharT>
|
||||
int64_t BasicParser<CharT>::parseSimpleInt(int base) {
|
||||
int64_t BasicParser<CharT>::parseSimpleInt(int base, size_t maxLength) {
|
||||
if (maxLength == 0) return 0;
|
||||
|
||||
size_t start = pos;
|
||||
CharT c = peek();
|
||||
int index = hexchar2int(c);
|
||||
if (index == -1 || index >= base) {
|
||||
@ -357,7 +360,7 @@ int64_t BasicParser<CharT>::parseSimpleInt(int base) {
|
||||
}
|
||||
int64_t value = index;
|
||||
pos++;
|
||||
while (hasNext()) {
|
||||
while (hasNext() && pos - start < maxLength) {
|
||||
c = source[pos];
|
||||
while (c == '_') {
|
||||
c = source[++pos];
|
||||
@ -476,7 +479,7 @@ std::basic_string<CharT> BasicParser<CharT>::parseString(
|
||||
continue;
|
||||
}
|
||||
if (c == 'u' || c == 'x') {
|
||||
int codepoint = parseSimpleInt(16);
|
||||
int codepoint = parseSimpleInt(16, c == 'u' ? 4 : 2);
|
||||
ubyte bytes[4];
|
||||
int size = util::encode_utf8(codepoint, bytes);
|
||||
CharT chars[4];
|
||||
|
||||
@ -22,7 +22,7 @@ namespace markdown {
|
||||
Result<wchar_t> process(std::wstring_view source, bool eraseMarkdown);
|
||||
|
||||
template <typename CharT>
|
||||
inline std::basic_string<CharT> escape(std::string_view source) {
|
||||
inline std::basic_string<CharT> escape(std::basic_string_view<CharT> source) {
|
||||
std::basic_stringstream<CharT> ss;
|
||||
int pos = 0;
|
||||
while (pos < source.size()) {
|
||||
|
||||
@ -40,7 +40,7 @@ std::string util::escape(std::string_view s, bool escapeUnicode) {
|
||||
uint cpsize;
|
||||
int codepoint = decode_utf8(cpsize, s.data() + pos);
|
||||
if (escapeUnicode) {
|
||||
ss << "\\u" << std::hex << codepoint;
|
||||
ss << "\\u" << std::setw(4) << std::setfill('0') << std::hex << codepoint;
|
||||
} else {
|
||||
ss << std::string(s.data() + pos, cpsize);
|
||||
}
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
#include "util/stringutil.hpp"
|
||||
#include "coders/BasicParser.hpp"
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
@ -16,6 +17,25 @@ TEST(stringutil, utf8) {
|
||||
EXPECT_EQ(str, str2);
|
||||
}
|
||||
|
||||
static std::wstring gen_random_unicode_wstring(int n) {
|
||||
std::wstring str;
|
||||
str.resize(n);
|
||||
for (int i = 0; i < n; i++) {
|
||||
// wstring is 16 bit in some systems
|
||||
str[i] = rand() & 0xFFFF;
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
TEST(stringutil, utf8_random) {
|
||||
srand(5436324);
|
||||
|
||||
auto str = gen_random_unicode_wstring(10'000);
|
||||
auto utf8str = util::wstr2str_utf8(str);
|
||||
auto back = util::str2wstr_utf8(utf8str);
|
||||
EXPECT_EQ(str, back);
|
||||
}
|
||||
|
||||
TEST(stringutil, base64) {
|
||||
srand(2019);
|
||||
for (size_t size = 0; size < 30; size++) {
|
||||
@ -47,3 +67,37 @@ TEST(stringutil, base64_urlsafe) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class StringParser : BasicParser<char> {
|
||||
public:
|
||||
StringParser(std::string_view source) : BasicParser("<string>", source) {}
|
||||
|
||||
std::string parse() {
|
||||
++pos;
|
||||
return parseString(source[0], true);
|
||||
}
|
||||
};
|
||||
|
||||
TEST(stringutil, escape_cases) {
|
||||
auto escaped = util::escape("тест5", true);
|
||||
auto expected = "\"\\u0442\\u0435\\u0441\\u04425\"";
|
||||
ASSERT_EQ(expected, escaped);
|
||||
|
||||
srand(345873458);
|
||||
for (int i = 0; i < 36; i++) {
|
||||
rand();
|
||||
}
|
||||
|
||||
auto str = gen_random_unicode_wstring(40);
|
||||
auto utf8str = util::wstr2str_utf8(str);
|
||||
escaped = util::escape(utf8str, true);
|
||||
|
||||
StringParser parser(escaped);
|
||||
auto restored = parser.parse();
|
||||
for (int i = 0; i < utf8str.length(); i++) {
|
||||
if (utf8str[i] != restored[i]) {
|
||||
std::cout << i << ": " << (int)utf8str[i] << " " << (int)restored[i] << std::endl;
|
||||
}
|
||||
}
|
||||
EXPECT_EQ(utf8str, restored);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user