diff --git a/src/coders/commons.cpp b/src/coders/commons.cpp index 82033cd5..422159ba 100644 --- a/src/coders/commons.cpp +++ b/src/coders/commons.cpp @@ -98,6 +98,18 @@ void BasicParser::skipWhitespace() { } } +void BasicParser::skip(size_t n) { + n = std::min(n, source.length()-pos); + + for (size_t i = 0; i < n; i++) { + char next = source[pos++]; + if (next == '\n') { + line++; + linestart = pos; + } + } +} + void BasicParser::skipLine() { while (hasNext()) { if (source[pos] == '\n') { @@ -110,10 +122,28 @@ void BasicParser::skipLine() { } } +bool BasicParser::skipTo(const std::string& substring) { + size_t idx = source.find(substring, pos); + if (idx == std::string::npos) { + skip(source.length()-pos); + return false; + } else { + skip(idx-pos); + return true; + } +} + bool BasicParser::hasNext() { return pos < source.length(); } +bool BasicParser::isNext(const std::string& substring) { + if (source.length() - pos < substring.length()) { + return false; + } + return source.substr(pos, substring.length()) == substring; +} + char BasicParser::nextChar() { if (!hasNext()) { throw error("unexpected end"); @@ -129,6 +159,17 @@ void BasicParser::expect(char expected) { pos++; } +void BasicParser::expect(const std::string& substring) { + if (substring.empty()) + return; + for (uint i = 0; i < substring.length(); i++) { + if (source.length() <= pos + i || source[pos+i] != substring[i]) { + throw error(escape_string(substring)+" expected"); + } + } + pos += substring.length(); +} + void BasicParser::expectNewLine() { while (hasNext()) { char next = source[pos]; @@ -145,6 +186,10 @@ void BasicParser::expectNewLine() { } } +void BasicParser::goBack() { + if (pos) pos--; +} + char BasicParser::peek() { skipWhitespace(); if (pos >= source.length()) { diff --git a/src/coders/commons.h b/src/coders/commons.h index 6c225309..431f247d 100644 --- a/src/coders/commons.h +++ b/src/coders/commons.h @@ -70,12 +70,17 @@ protected: uint linestart = 0; virtual void skipWhitespace(); + void skip(size_t n); void skipLine(); + bool skipTo(const std::string& substring); void expect(char expected); + void expect(const std::string& substring); char peek(); char nextChar(); bool hasNext(); + bool isNext(const std::string& substring); void expectNewLine(); + void goBack(); std::string parseName(); int64_t parseSimpleInt(int base); diff --git a/src/coders/xml.cpp b/src/coders/xml.cpp new file mode 100644 index 00000000..54d97c2c --- /dev/null +++ b/src/coders/xml.cpp @@ -0,0 +1,328 @@ +#include "xml.h" + +#include +#include +#include "../util/stringutil.h" + +using namespace xml; + +Attribute::Attribute(std::string name, std::string text) + : name(name), + text(text) { +} + +const std::string& Attribute::getName() const { + return name; +} + +const std::string& Attribute::getText() const { + return text; +} + +int64_t Attribute::asInt() const { + return std::stoll(text); +} + +double Attribute::asFloat() const { + return std::stod(text); +} + +bool Attribute::asBool() const { + return text == "true" || text == "1"; +} + +Node::Node(std::string tag) : tag(tag) { +} + +void Node::add(xmlelement element) { + elements.push_back(element); +} + +void Node::set(std::string name, std::string text) { + attrs.insert_or_assign(name, Attribute(name, text)); +} + +const std::string& Node::getTag() const { + return tag; +} + +const xmlattribute Node::attr(const std::string& name) const { + auto found = attrs.find(name); + if (found == attrs.end()) { + throw std::runtime_error("element <"+tag+" ...> missing attribute "+name); + } + return found->second; +} + +const xmlattribute Node::attr(const std::string& name, const std::string& def) const { + auto found = attrs.find(name); + if (found == attrs.end()) { + return Attribute(name, def); + } + return found->second; +} + +bool Node::has(const std::string& name) const { + auto found = attrs.find(name); + return found != attrs.end(); +} + +xmlelement Node::sub(size_t index) { + return elements.at(index); +} + +size_t Node::size() const { + return elements.size(); +} + +const std::vector& Node::getElements() const { + return elements; +} + +const xmlelements_map& Node::getAttributes() const { + return attrs; +} + +Document::Document(std::string version, std::string encoding) + : version(version), + encoding(encoding) { +} + +void Document::setRoot(xmlelement element) { + this->root = element; +} + +xmlelement Document::getRoot() const { + return root; +} + +const std::string& Document::getVersion() const { + return version; +} + +const std::string& Document::getEncoding() const { + return encoding; +} + +Parser::Parser(std::string filename, std::string source) + : BasicParser(filename, source) { +} + +xmlelement Parser::parseOpenTag() { + std::string tag = parseName(); + auto node = std::make_shared(tag); + + char c; + while (true) { + skipWhitespace(); + c = peek(); + if (c == '/' || c == '>' || c == '?') + break; + std::string attrname = parseName(); + std::string attrtext = ""; + skipWhitespace(); + if (peek() == '=') { + nextChar(); + skipWhitespace(); + expect('"'); + attrtext = parseString('"'); + } + node->set(attrname, attrtext); + } + return node; +} + +void Parser::parseDeclaration() { + std::string version = "1.0"; + std::string encoding = "UTF-8"; + expect('<'); + if (peek() == '?') { + nextChar(); + xmlelement node = parseOpenTag(); + expect("?>"); + if (node->getTag() != "xml") { + throw error("invalid declaration"); + } + version = node->attr("version", version).getText(); + encoding = node->attr("encoding", encoding).getText(); + if (encoding != "utf-8" && encoding != "UTF-8") { + throw error("UTF-8 encoding is only supported"); + } + } else { + goBack(); + } + document = std::make_shared(version, encoding); +} + +void Parser::parseComment() { + expect("!--"); + if (skipTo("-->")) { + skip(3); + } else { + throw error("comment close missing"); + } +} + +std::string Parser::parseText() { + size_t start = pos; + while (hasNext()) { + char c = peek(); + if (c == '<') { + break; + } + nextChar(); + } + return source.substr(start, pos-start); +} + +xmlelement Parser::parseElement() { + // text element + if (peek() != '<') { + auto element = std::make_shared("#"); + auto text = parseText(); + util::replaceAll(text, """, "\""); + util::replaceAll(text, "'", "'"); + util::replaceAll(text, "<", "<"); + util::replaceAll(text, ">", ">"); + util::replaceAll(text, "&", "&"); + element->set("#", text); + return element; + } + nextChar(); + + // + if (peek() == '!') { + if (isNext("!DOCTYPE ")) { + throw error("XML DTD is not supported yet"); + } + parseComment(); + return nullptr; + } + + auto element = parseOpenTag(); + char c = nextChar(); + + // + if (c == '/') { + expect('>'); + } + // ... + else if (c == '>') { + skipWhitespace(); + while (!isNext("add(sub); + } + skipWhitespace(); + } + skip(2); + expect(element->getTag()); + expect('>'); + } + // + else { + throw error("invalid syntax"); + } + return element; +} + +xmldocument Parser::parse() { + parseDeclaration(); + document->setRoot(parseElement()); + return document; +} + +xmldocument xml::parse(std::string filename, std::string source) { + Parser parser(filename, source); + return parser.parse(); +} + +inline void newline( + std::stringstream& ss, + bool nice, + const std::string& indentStr, + int indent +) { + if (!nice) + return; + ss << '\n'; + for (int i = 0; i < indent; i++) { + ss << indentStr; + } +} + +static void stringifyElement( + std::stringstream& ss, + const xmlelement element, + bool nice, + const std::string& indentStr, + int indent +) { + if (element->isText()) { + std::string text = element->attr("#").getText(); + util::replaceAll(text, "&", "&"); + util::replaceAll(text, "\"","""); + util::replaceAll(text, "'", "'"); + util::replaceAll(text, "<", "<"); + util::replaceAll(text, ">", ">"); + ss << text; + return; + } + const std::string& tag = element->getTag(); + + ss << '<' << tag; + auto& attrs = element->getAttributes(); + if (!attrs.empty()) { + ss << ' '; + int count = 0; + for (auto& entry : attrs) { + auto attr = entry.second; + ss << attr.getName(); + if (!attr.getText().empty()) { + ss << "=" << escape_string(attr.getText()); + } + if (count + 1 < int(attrs.size())) { + ss << " "; + } + count++; + } + } + auto& elements = element->getElements(); + if (elements.size() == 1 && elements[0]->isText()) { + ss << ">"; + stringifyElement(ss, elements[0], nice, indentStr, indent+1); + ss << ""; + return; + } + if (!elements.empty()) { + ss << '>'; + for (auto& sub : elements) { + newline(ss, nice, indentStr, indent+1); + stringifyElement(ss, sub, nice, indentStr, indent+1); + } + newline(ss, nice, indentStr, indent); + ss << ""; + + } else { + ss << "/>"; + } + +} + +std::string xml::stringify( + const xmldocument document, + bool nice, + const std::string& indentStr +) { + std::stringstream ss; + + // XML declaration + ss << "getVersion(); + ss << "\" encoding=\"UTF-8\" ?>"; + newline(ss, nice, indentStr, 0); + + stringifyElement(ss, document->getRoot(), nice, indentStr, 0); + + return ss.str(); +} diff --git a/src/coders/xml.h b/src/coders/xml.h new file mode 100644 index 00000000..29c5a919 --- /dev/null +++ b/src/coders/xml.h @@ -0,0 +1,132 @@ +#ifndef CODERS_XML_H_ +#define CODERS_XML_H_ + +#include +#include +#include +#include + +#include "commons.h" + +namespace xml { + class Node; + class Attribute; + class Document; + + typedef Attribute xmlattribute; + typedef std::shared_ptr xmlelement; + typedef std::shared_ptr xmldocument; + typedef std::unordered_map xmlelements_map; + + class Attribute { + std::string name; + std::string text; + public: + Attribute(std::string name, std::string text); + + const std::string& getName() const; + const std::string& getText() const; + int64_t asInt() const; + double asFloat() const; + bool asBool() const; + }; + + /* XML element class. Text element has tag 'text' and attribute 'text' */ + class Node { + std::string tag; + std::unordered_map attrs; + std::vector elements; + public: + Node(std::string tag); + + /* Add sub-element */ + void add(xmlelement element); + + /* Set attribute value. Creates attribute if does not exists */ + void set(std::string name, std::string text); + + /* Get element tag */ + const std::string& getTag() const; + + inline bool isText() const { + return getTag() == "#"; + } + + inline const std::string& text() const { + return attr("#").getText(); + } + + /* Get attribute by name + @param name attribute name + @throws std::runtime_error if element has no attribute + @return xmlattribute - {name, value} */ + const xmlattribute attr(const std::string& name) const; + /* Get attribute by name + @param name name + @param def default value will be returned wrapped in xmlattribute + if element has no attribute + @return xmlattribute - {name, value} or {name, def} if not found*/ + const xmlattribute attr(const std::string& name, const std::string& def) const; + + /* Check if element has attribute + @param name attribute name */ + bool has(const std::string& name) const; + + /* Get sub-element by index + @throws std::out_of_range if an invalid index given */ + xmlelement sub(size_t index); + + /* Get number of sub-elements */ + size_t size() const; + + const std::vector& getElements() const; + const xmlelements_map& getAttributes() const; + }; + + class Document { + xmlelement root = nullptr; + std::string version; + std::string encoding; + public: + Document(std::string version, std::string encoding); + + void setRoot(xmlelement element); + xmlelement getRoot() const; + + const std::string& getVersion() const; + const std::string& getEncoding() const; + }; + + class Parser : public BasicParser { + xmldocument document; + + xmlelement parseOpenTag(); + xmlelement parseElement(); + void parseDeclaration(); + void parseComment(); + std::string parseText(); + public: + Parser(std::string filename, std::string source); + + xmldocument parse(); + }; + + /* Serialize XML Document to string + @param document serializing document + @param nice use human readable format + (with indents and line-separators) + @param indentStr indentation characters sequence + (default - 4 spaces)*/ + extern std::string stringify( + const xmldocument document, + bool nice=true, + const std::string& indentStr=" " + ); + + /* Read XML Document from string + @param filename file name will be shown in error messages + @param source xml source code string */ + extern xmldocument parse(std::string filename, std::string source); +} + +#endif // CODERS_XML_H_ diff --git a/src/util/stringutil.cpp b/src/util/stringutil.cpp index 19bc104d..74d6f4b8 100644 --- a/src/util/stringutil.cpp +++ b/src/util/stringutil.cpp @@ -252,3 +252,18 @@ std::vector util::base64_decode(const char* str, size_t size) { std::vector util::base64_decode(const std::string& str) { return base64_decode(str.c_str(), str.size()); } + +int util::replaceAll(std::string& str, const std::string& from, const std::string& to) { + int count = 0; + size_t offset = 0; + while (true) { + size_t start_pos = str.find(from, offset); + if(start_pos == std::string::npos) + break; + str.replace(start_pos, from.length(), to); + offset = start_pos + to.length(); + count++; + break; + } + return count; +} diff --git a/src/util/stringutil.h b/src/util/stringutil.h index 7944b9d8..e088ecfb 100644 --- a/src/util/stringutil.h +++ b/src/util/stringutil.h @@ -26,6 +26,8 @@ namespace util { extern std::string base64_encode(const ubyte* data, size_t size); extern std::vector base64_decode(const char* str, size_t size); extern std::vector base64_decode(const std::string& str); + + extern int replaceAll(std::string& str, const std::string& from, const std::string& to); } #endif // UTIL_STRINGUTIL_H_ \ No newline at end of file