#include "BasicParser.hpp" #include #include #include "util/stringutil.hpp" namespace { inline int is_box(int c) { switch (c) { case 'B': case 'b': return 2; case 'O': case 'o': return 8; case 'X': case 'x': return 16; } return 10; } inline double power(double base, int64_t power) { double result = 1.0; for (int64_t i = 0; i < power; i++) { result *= base; } return result; } } template void BasicParser::skipWhitespace(bool newline) { if (hashComment) { skipWhitespaceHashComment(newline); return; } while (hasNext()) { char next = source[pos]; if (next == '\n') { if (!newline) { break; } line++; linestart = ++pos; continue; } if (is_whitespace(next)) { pos++; } else { break; } } } template void BasicParser::skipWhitespaceHashComment(bool newline) { while (hasNext()) { char next = source[pos]; if (next == '\n') { if (!newline) { break; } line++; linestart = ++pos; continue; } if (is_whitespace(next)) { pos++; } else { break; } } if (hasNext() && source[pos] == '#') { if (!newline) { readUntilEOL(); return; } skipLine(); if (hasNext() && (is_whitespace(source[pos]) || source[pos] == '#')) { skipWhitespaceHashComment(newline); } } } template void BasicParser::skip(size_t n) { n = std::min(n, source.length() - pos); for (size_t i = 0; i < n; i++) { char next = source[pos++]; if (next == '\n') { line++; linestart = pos; } } } template void BasicParser::skipLine() { while (hasNext()) { if (source[pos] == '\n') { pos++; linestart = pos; line++; break; } pos++; } } template void BasicParser::skipEmptyLines() { skipWhitespace(); pos = linestart; } template bool BasicParser::skipTo(const std::basic_string& substring) { size_t idx = source.find(substring, pos); if (idx == std::string::npos) { skip(source.length() - pos); return false; } else { skip(idx - pos); return true; } } template bool BasicParser::hasNext() { return pos < source.length(); } template size_t BasicParser::remain() const { return source.length() - pos; } template bool BasicParser::isNext(const std::basic_string& substring) { if (source.length() - pos < substring.length()) { return false; } return source.substr(pos, substring.length()) == substring; } template CharT BasicParser::nextChar() { if (!hasNext()) { throw error("unexpected end"); } return source[pos++]; } template void BasicParser::expect(CharT expected) { char c = peek(); if (c != expected) { throw error( "'" + std::string({static_cast(expected)}) + "' expected" ); } pos++; } template void BasicParser::expect(const std::basic_string& substring) { if (substring.empty()) return; for (uint i = 0; i < substring.length(); i++) { if (source.length() <= pos + i || source[pos + i] != substring[i]) { throw error( util::quote(util::str2str_utf8(substring)) + " expected" ); } } pos += substring.length(); } template void BasicParser::expectNewLine() { while (hasNext()) { char next = source[pos]; if (next == '\n') { line++; linestart = ++pos; return; } if (is_whitespace(next)) { pos++; } else { throw error("line separator expected"); } } } template void BasicParser::goBack(size_t count) { if (pos < count) { throw std::runtime_error("pos < jump"); } if (pos) { pos -= count; } } template void BasicParser::reset() { pos = 0; } template CharT BasicParser::peekInLine() { while (hasNext()) { CharT next = source[pos]; if (next == '\n') { return next; } if (is_whitespace(next)) { pos++; } else { break; } } if (pos >= source.length()) { throw error("unexpected end"); } return source[pos]; } template CharT BasicParser::peek() { skipWhitespace(); if (pos >= source.length()) { throw error("unexpected end"); } return source[pos]; } template CharT BasicParser::peekNoJump() { if (pos >= source.length()) { throw error("unexpected end"); } return source[pos]; } template std::basic_string_view BasicParser::readUntil(CharT c) { int start = pos; while (hasNext() && source[pos] != c) { pos++; } return source.substr(start, pos - start); } template std::basic_string_view BasicParser::readUntil( std::basic_string_view s, bool nothrow ) { int start = pos; size_t found = source.find(s, pos); if (found == std::string::npos) { if (nothrow) { pos = source.size(); return source.substr(start); } throw error(util::quote(util::str2str_utf8(s)) + " expected"); } skip(found - pos); return source.substr(start, pos - start); } template std::basic_string_view BasicParser::readUntilWhitespace() { int start = pos; while (hasNext() && !is_whitespace(source[pos])) { pos++; } return source.substr(start, pos - start); } template std::basic_string_view BasicParser::readUntilEOL() { int start = pos; while (hasNext() && source[pos] != '\n') { pos++; } if (pos > start && source[pos - 1] == '\r') { return source.substr(start, pos - start - 1); } return source.substr(start, pos - start); } template std::basic_string BasicParser::parseName() { char c = peek(); if (!is_identifier_start(c)) { throw error("identifier expected"); } int start = pos; while (hasNext() && is_identifier_part(source[pos])) { pos++; } return std::basic_string(source.substr(start, pos - start)); } template std::basic_string BasicParser::parseXmlName() { CharT c = peek(); if (!is_json_identifier_start(c)) { throw error("identifier expected"); } int start = pos; while (hasNext() && is_json_identifier_part(source[pos])) { pos++; } return std::basic_string(source.substr(start, pos - start)); } template int64_t BasicParser::parseSimpleInt(int base) { CharT c = peek(); int index = hexchar2int(c); if (index == -1 || index >= base) { throw error("invalid number literal"); } int64_t value = index; pos++; while (hasNext()) { c = source[pos]; while (c == '_') { c = source[++pos]; } index = hexchar2int(c); if (index == -1 || index >= base) { return value; } value *= base; value += index; pos++; } return value; } template dv::value BasicParser::parseNumber() { switch (peek()) { case '-': skip(1); return parseNumber(-1); case '+': skip(1); return parseNumber(1); default: return parseNumber(1); } } template dv::value BasicParser::parseNumber(int sign) { CharT c = peek(); int base = 10; if (c == '0' && pos + 1 < source.length() && (base = is_box(source[pos + 1])) != 10) { pos += 2; return parseSimpleInt(base); } else if (c == 'i' && pos + 2 < source.length() && source[pos + 1] == 'n' && source[pos + 2] == 'f') { pos += 3; return INFINITY * sign; } else if (c == 'n' && pos + 2 < source.length() && source[pos + 1] == 'a' && source[pos + 2] == 'n') { pos += 3; return NAN * sign; } int64_t value = parseSimpleInt(base); if (!hasNext()) { return value * sign; } c = source[pos]; if (c == 'e' || c == 'E') { pos++; int s = 1; if (peek() == '-') { s = -1; pos++; } else if (peek() == '+') { pos++; } return sign * value * power(10.0, s * parseSimpleInt(10)); } if (c == '.') { pos++; int64_t expo = 1; while (hasNext() && source[pos] == '0') { expo *= 10; pos++; } int64_t afterdot = 0; if (hasNext() && is_digit(source[pos])) { afterdot = parseSimpleInt(10); } expo *= power( 10, std::max( static_cast(0), static_cast(std::log10(afterdot) + 1) ) ); c = source[pos]; double dvalue = (value + (afterdot / (double)expo)); if (c == 'e' || c == 'E') { pos++; int s = 1; if (peek() == '-') { s = -1; pos++; } else if (peek() == '+') { pos++; } return sign * dvalue * power(10.0, s * parseSimpleInt(10)); } return sign * dvalue; } return sign * value; } template std::basic_string BasicParser::parseString( CharT quote, bool closeRequired ) { std::basic_stringstream ss; while (hasNext()) { CharT c = source[pos]; if (c == quote) { pos++; return ss.str(); } if (c == '\\') { pos++; c = nextChar(); if (c >= '0' && c <= '7') { pos--; ss << (char)parseSimpleInt(8); continue; } if (c == 'u' || c == 'x') { int codepoint = parseSimpleInt(16); ubyte bytes[4]; int size = util::encode_utf8(codepoint, bytes); CharT chars[4]; for (int i = 0; i < 4; i++) { chars[i] = bytes[i]; } ss.write(chars, size); continue; } switch (c) { case 'n': ss << '\n'; break; case 'r': ss << '\r'; break; case 'b': ss << '\b'; break; case 't': ss << '\t'; break; case 'f': ss << '\f'; break; case 'v': ss << '\v'; break; case '\'': ss << '\''; break; case '"': ss << '"'; break; case '\\': ss << '\\'; break; case '/': ss << '/'; break; case '\n': continue; default: throw error( "'\\" + util::str2str_utf8(std::basic_string({c})) + "' is an illegal escape" ); } continue; } if (c == '\n' && closeRequired) { throw error("non-closed string literal"); } ss << c; pos++; } if (closeRequired) { throw error("unexpected end"); } return ss.str(); } template <> inline parsing_error BasicParser::error(const std::string& message) { return parsing_error(message, filename, source, pos, line, linestart); } template <> inline parsing_error BasicParser::error(const std::string& message) { size_t utf8pos = util::length_utf8(source.substr(0, pos)); size_t utf8linestart = utf8pos - util::length_utf8(source.substr(linestart, pos)); return parsing_error( message, filename, util::str2str_utf8(source), utf8pos, line, utf8linestart ); }