add YAML parser

This commit is contained in:
MihailRis 2025-03-23 08:49:09 +03:00
parent 239181bb64
commit 2aea19febd
8 changed files with 406 additions and 25 deletions

View File

@ -7,16 +7,20 @@ template <typename CharT>
class BasicParser {
using StringT = std::basic_string<CharT>;
using StringViewT = std::basic_string_view<CharT>;
void skipWhitespaceHashComment(bool newline = true);
protected:
std::string_view filename;
StringViewT source;
uint pos = 0;
uint line = 1;
uint linestart = 0;
bool hashComment = false;
virtual void skipWhitespace();
void skipWhitespace(bool newline = true);
void skip(size_t n);
void skipLine();
void skipEmptyLines();
bool skipTo(const StringT& substring);
void expect(CharT expected);
void expect(const StringT& substring);

View File

@ -31,10 +31,17 @@ namespace {
}
template<typename CharT>
void BasicParser<CharT>::skipWhitespace() {
void BasicParser<CharT>::skipWhitespace(bool newline) {
if (hashComment) {
skipWhitespaceHashComment(newline);
return;
}
while (hasNext()) {
char next = source[pos];
if (next == '\n') {
if (!newline) {
break;
}
line++;
linestart = ++pos;
continue;
@ -47,6 +54,36 @@ void BasicParser<CharT>::skipWhitespace() {
}
}
template<typename CharT>
void BasicParser<CharT>::skipWhitespaceHashComment(bool newline) {
while (hasNext()) {
char next = source[pos];
if (next == '\n') {
if (!newline) {
break;
}
line++;
linestart = ++pos;
continue;
}
if (is_whitespace(next)) {
pos++;
} else {
break;
}
}
if (hasNext() && source[pos] == '#') {
if (!newline) {
readUntilEOL();
return;
}
skipLine();
if (hasNext() && (is_whitespace(source[pos]) || source[pos] == '#')) {
skipWhitespaceHashComment(newline);
}
}
}
template<typename CharT>
void BasicParser<CharT>::skip(size_t n) {
n = std::min(n, source.length() - pos);
@ -73,6 +110,12 @@ void BasicParser<CharT>::skipLine() {
}
}
template<typename CharT>
void BasicParser<CharT>::skipEmptyLines() {
skipWhitespace();
pos = linestart;
}
template<typename CharT>
bool BasicParser<CharT>::skipTo(const std::basic_string<CharT>& substring) {
size_t idx = source.find(substring, pos);

View File

@ -13,13 +13,14 @@ using namespace json;
namespace {
class Parser : BasicParser<char> {
dv::value parseList();
dv::value parseObject();
dv::value parseValue();
public:
public:
Parser(std::string_view filename, std::string_view source);
dv::value parse();
private:
dv::value parseList();
dv::value parseObject();
dv::value parseValue();
};
}

View File

@ -16,16 +16,6 @@ using namespace toml;
class TomlReader : BasicParser<char> {
dv::value root;
void skipWhitespace() override {
BasicParser::skipWhitespace();
if (hasNext() && source[pos] == '#') {
skipLine();
if (hasNext() && is_whitespace(peek())) {
skipWhitespace();
}
}
}
// modified version of BaseParser.parseString
// todo: extract common part
std::string parseMultilineString() {
@ -214,6 +204,7 @@ class TomlReader : BasicParser<char> {
public:
TomlReader(std::string_view file, std::string_view source)
: BasicParser(file, source), root(dv::object()) {
hashComment = true;
}
dv::value read() {

318
src/coders/yaml.cpp Normal file
View File

@ -0,0 +1,318 @@
#include "yaml.hpp"
#include "BasicParser.hpp"
using namespace yaml;
namespace {
enum Chomping {
CLIP, STRIP, KEEP
};
class Parser : BasicParser<char> {
public:
Parser(std::string_view filename, std::string_view source);
dv::value parseValue();
dv::value parseFullValue(int indent);
dv::value parseArray(int indent = 0);
dv::value parseObject(dv::value&& object, int indent = 0);
dv::value parseInlineArray();
dv::value parseInlineObject();
private:
int countIndent();
bool expectIndent(int indent);
std::string_view readYamlIdentifier();
std::string readMultilineString(int indent, bool eols, Chomping chomp);
};
}
inline bool is_yaml_identifier_char(int c) {
return c > 20 && c != ':' && c != ' ' && c != '\n' && c != '\r' &&
c != '\t' && c != '\f' && c != '\v';
}
static dv::value perform_literal(std::string_view literal) {
if (literal == "true" || literal == "True" ||
literal == "false" || literal == "False") {
return literal[0] == 't';
}
if (literal == "null" || literal == "Null") {
return nullptr;
}
return std::string(literal);
}
Parser::Parser(std::string_view filename, std::string_view source)
: BasicParser(filename, source) {
hashComment = true;
}
bool Parser::expectIndent(int required) {
int indent = 0;
while (hasNext() && source[pos] == ' ' && indent < required) {
indent++;
pos++;
}
return indent >= required;
}
std::string Parser::readMultilineString(int indent, bool eols, Chomping chomp) {
int next_indent = countIndent();
if (next_indent <= indent) {
throw error("indentation error");
}
std::stringstream ss;
ss << readUntilEOL();
if (hasNext()) {
skip(1);
}
int trailingEmpties = 0;
while (true) {
while (expectIndent(next_indent)) {
trailingEmpties = 0;
ss << (eols ? '\n' : ' ');
ss << readUntilEOL();
if (hasNext()) {
skip(1);
}
}
while (true) {
skipWhitespace(false);
if (!hasNext() || source[pos] != '\n') {
break;
}
skip(1);
trailingEmpties++;
}
if (!expectIndent(next_indent)) {
break;
}
pos = linestart;
}
if (chomp == KEEP) {
for (int i = 0; i < trailingEmpties - 1; i++) {
ss << (eols ? '\n' : ' ');
}
}
ss << '\n';
pos = linestart;
auto string = ss.str();
if (chomp == STRIP) {
util::trim(string);
}
return string;
}
std::string_view Parser::readYamlIdentifier() {
char c = peek();
if (!is_yaml_identifier_char(c)) {
throw error("identifier expected");
}
int start = pos;
while (hasNext() && is_yaml_identifier_char(source[pos])) {
pos++;
}
return source.substr(start, pos - start);
}
int Parser::countIndent() {
int indent = 0;
while (hasNext() && source[pos] == ' ') {
indent++;
pos++;
}
return indent;
}
dv::value Parser::parseValue() {
char c = peek();
if (is_digit(c)) {
return parseNumber(1);
} else if (c == '-' || c == '+') {
skip(1);
return parseNumber(c == '-' ? -1 : 1);
} else if (c == '"' || c == '\'') {
skip(1);
return parseString(c, true);
} else if (c == '[') {
return parseInlineArray();
} else if (c == '{') {
return parseInlineObject();
} else {
return perform_literal(readUntilEOL());
}
throw error("unexpected character");
}
dv::value Parser::parseInlineArray() {
expect('[');
auto list = dv::list();
while (peek() != ']') {
if (peek() == '#') {
skipLine();
continue;
}
list.add(parseValue());
char next = peek();
if (next == ',') {
pos++;
} else if (next == ']') {
break;
} else {
throw error("',' expected");
}
}
pos++;
return list;
}
dv::value Parser::parseInlineObject() {
expect('{');
dv::value object = dv::object();
while (peek() != '}') {
if (peek() == '#') {
skipLine();
continue;
}
auto name = readYamlIdentifier();
expect(':');
object[std::string(name)] = parseValue();
char next = peek();
if (next == ',') {
pos++;
} else if (next == '}') {
break;
} else {
throw error("',' expected");
}
}
pos++;
return object;
}
dv::value Parser::parseFullValue(int indent) {
dv::value value;
char c = source[pos];
if (c == '\n') {
skip(1);
skipEmptyLines();
int init_pos = pos;
int next_indent = countIndent();
if (next_indent < indent) {
throw error("indentation error");
}
if (source[pos] == '-') {
pos = init_pos;
return parseArray(next_indent);
} else {
pos = init_pos;
return parseObject(dv::object(), next_indent);
}
} else if (is_digit(c)) {
return parseNumber(1);
} else if (c == '-' || c == '+') {
skip(1);
return parseNumber(c == '-' ? -1 : 1);
} else if (c == '"' || c == '\'') {
skip(1);
return parseString(c, true);
} else if (c == '[') {
return parseInlineArray();
} else if (c == '{') {
return parseInlineObject();
} else if (c == '|' || c == '>') {
skip(1);
Chomping chomp = CLIP;
if (source[pos] == '-' || source[pos] == '+') {
chomp = source[pos] == '-' ? STRIP : KEEP;
skip(1);
}
skipWhitespace(false);
expectNewLine();
return readMultilineString(indent, c == '|', chomp);
} else {
return perform_literal(readUntilEOL());
}
}
dv::value Parser::parseArray(int indent) {
dv::value list = dv::list();
while (hasNext()) {
skipEmptyLines();
int next_indent = countIndent();
if (next_indent < indent) {
pos = linestart;
break;
}
expect('-');
skipWhitespace();
size_t nlpos = source.find('\n', pos);
size_t colonpos = source.find(':', pos);
if (nlpos == std::string::npos && colonpos == std::string::npos) {
list.add(perform_literal(readUntilEOL()));
break;
}
if (nlpos < colonpos) {
list.add(parseFullValue(next_indent));
skipLine();
} else {
auto name = readYamlIdentifier();
expect(':');
skipWhitespace(false);
dv::value object = dv::object();
object[std::string(name)] = parseFullValue(next_indent);
skipEmptyLines();
next_indent = countIndent();
if (next_indent > indent) {
pos = linestart;
object = parseObject(std::move(object), next_indent);
} else {
pos = linestart;
}
list.add(std::move(object));
}
}
return list;
}
dv::value Parser::parseObject(dv::value&& object, int indent) {
skipEmptyLines();
while (hasNext()) {
size_t prev_pos = pos;
int next_indent = countIndent();
if (source[pos] == '\n') {
skip(1);
continue;
}
if (next_indent < indent) {
pos = prev_pos;
break;
}
char c = peek();
if (!is_yaml_identifier_char(c)) {
if (!is_whitespace(c)) {
throw error("invalid character");
}
continue;
}
auto name = readYamlIdentifier();
expect(':');
skipWhitespace(false);
object[std::string(name)] = parseFullValue(indent);
skipEmptyLines();
}
return object;
}
dv::value yaml::parse(std::string_view filename, std::string_view source) {
return Parser(filename, source).parseObject(dv::object());
}
dv::value yaml::parse(std::string_view source) {
return parse("[string]", source);
}

10
src/coders/yaml.hpp Normal file
View File

@ -0,0 +1,10 @@
#pragma once
#include <string>
#include "data/dv.hpp"
namespace yaml {
dv::value parse(std::string_view filename, std::string_view source);
dv::value parse(std::string_view source);
}

View File

@ -40,18 +40,10 @@ const std::string& langs::Lang::getId() const {
/// @brief Language key-value txt files parser
namespace {
class Reader : BasicParser<char> {
void skipWhitespace() override {
BasicParser::skipWhitespace();
if (hasNext() && source[pos] == '#') {
skipLine();
if (hasNext() && is_whitespace(peek())) {
skipWhitespace();
}
}
}
public:
Reader(std::string_view file, std::string_view source)
: BasicParser(file, source) {
hashComment = true;
}
void read(langs::Lang& lang, const std::string &prefix) {

22
test/coders/yaml.cpp Normal file
View File

@ -0,0 +1,22 @@
#include <gtest/gtest.h>
#include "coders/yaml.hpp"
#include "coders/json.hpp"
#include "coders/commons.hpp"
#include "io/io.hpp"
#include "io/devices/StdfsDevice.hpp"
namespace fs = std::filesystem;
TEST(YAML, EncodeDecode) {
io::set_device("root", std::make_shared<io::StdfsDevice>(fs::u8path("../../")));
auto filename = "root:.github/workflows/windows-clang.yml";
try {
auto value = yaml::parse(io::read_string(filename));
std::cout << json::stringify(value, true) << std::endl;
} catch (const parsing_error& error) {
std::cerr << error.errorLog() << std::endl;
throw error;
}
}