Added XML parser/writer

This commit is contained in:
MihailRis 2024-02-01 02:01:04 +03:00
parent bfe2e2557b
commit 1f11fa8fea
6 changed files with 527 additions and 0 deletions

View File

@ -98,6 +98,18 @@ void BasicParser::skipWhitespace() {
}
}
void BasicParser::skip(size_t n) {
n = std::min(n, source.length()-pos);
for (size_t i = 0; i < n; i++) {
char next = source[pos++];
if (next == '\n') {
line++;
linestart = pos;
}
}
}
void BasicParser::skipLine() {
while (hasNext()) {
if (source[pos] == '\n') {
@ -110,10 +122,28 @@ void BasicParser::skipLine() {
}
}
bool BasicParser::skipTo(const std::string& substring) {
size_t idx = source.find(substring, pos);
if (idx == std::string::npos) {
skip(source.length()-pos);
return false;
} else {
skip(idx-pos);
return true;
}
}
bool BasicParser::hasNext() {
return pos < source.length();
}
bool BasicParser::isNext(const std::string& substring) {
if (source.length() - pos < substring.length()) {
return false;
}
return source.substr(pos, substring.length()) == substring;
}
char BasicParser::nextChar() {
if (!hasNext()) {
throw error("unexpected end");
@ -129,6 +159,17 @@ void BasicParser::expect(char expected) {
pos++;
}
void BasicParser::expect(const std::string& substring) {
if (substring.empty())
return;
for (uint i = 0; i < substring.length(); i++) {
if (source.length() <= pos + i || source[pos+i] != substring[i]) {
throw error(escape_string(substring)+" expected");
}
}
pos += substring.length();
}
void BasicParser::expectNewLine() {
while (hasNext()) {
char next = source[pos];
@ -145,6 +186,10 @@ void BasicParser::expectNewLine() {
}
}
void BasicParser::goBack() {
if (pos) pos--;
}
char BasicParser::peek() {
skipWhitespace();
if (pos >= source.length()) {

View File

@ -70,12 +70,17 @@ protected:
uint linestart = 0;
virtual void skipWhitespace();
void skip(size_t n);
void skipLine();
bool skipTo(const std::string& substring);
void expect(char expected);
void expect(const std::string& substring);
char peek();
char nextChar();
bool hasNext();
bool isNext(const std::string& substring);
void expectNewLine();
void goBack();
std::string parseName();
int64_t parseSimpleInt(int base);

328
src/coders/xml.cpp Normal file
View File

@ -0,0 +1,328 @@
#include "xml.h"
#include <stdexcept>
#include <sstream>
#include "../util/stringutil.h"
using namespace xml;
Attribute::Attribute(std::string name, std::string text)
: name(name),
text(text) {
}
const std::string& Attribute::getName() const {
return name;
}
const std::string& Attribute::getText() const {
return text;
}
int64_t Attribute::asInt() const {
return std::stoll(text);
}
double Attribute::asFloat() const {
return std::stod(text);
}
bool Attribute::asBool() const {
return text == "true" || text == "1";
}
Node::Node(std::string tag) : tag(tag) {
}
void Node::add(xmlelement element) {
elements.push_back(element);
}
void Node::set(std::string name, std::string text) {
attrs.insert_or_assign(name, Attribute(name, text));
}
const std::string& Node::getTag() const {
return tag;
}
const xmlattribute Node::attr(const std::string& name) const {
auto found = attrs.find(name);
if (found == attrs.end()) {
throw std::runtime_error("element <"+tag+" ...> missing attribute "+name);
}
return found->second;
}
const xmlattribute Node::attr(const std::string& name, const std::string& def) const {
auto found = attrs.find(name);
if (found == attrs.end()) {
return Attribute(name, def);
}
return found->second;
}
bool Node::has(const std::string& name) const {
auto found = attrs.find(name);
return found != attrs.end();
}
xmlelement Node::sub(size_t index) {
return elements.at(index);
}
size_t Node::size() const {
return elements.size();
}
const std::vector<xmlelement>& Node::getElements() const {
return elements;
}
const xmlelements_map& Node::getAttributes() const {
return attrs;
}
Document::Document(std::string version, std::string encoding)
: version(version),
encoding(encoding) {
}
void Document::setRoot(xmlelement element) {
this->root = element;
}
xmlelement Document::getRoot() const {
return root;
}
const std::string& Document::getVersion() const {
return version;
}
const std::string& Document::getEncoding() const {
return encoding;
}
Parser::Parser(std::string filename, std::string source)
: BasicParser(filename, source) {
}
xmlelement Parser::parseOpenTag() {
std::string tag = parseName();
auto node = std::make_shared<Node>(tag);
char c;
while (true) {
skipWhitespace();
c = peek();
if (c == '/' || c == '>' || c == '?')
break;
std::string attrname = parseName();
std::string attrtext = "";
skipWhitespace();
if (peek() == '=') {
nextChar();
skipWhitespace();
expect('"');
attrtext = parseString('"');
}
node->set(attrname, attrtext);
}
return node;
}
void Parser::parseDeclaration() {
std::string version = "1.0";
std::string encoding = "UTF-8";
expect('<');
if (peek() == '?') {
nextChar();
xmlelement node = parseOpenTag();
expect("?>");
if (node->getTag() != "xml") {
throw error("invalid declaration");
}
version = node->attr("version", version).getText();
encoding = node->attr("encoding", encoding).getText();
if (encoding != "utf-8" && encoding != "UTF-8") {
throw error("UTF-8 encoding is only supported");
}
} else {
goBack();
}
document = std::make_shared<Document>(version, encoding);
}
void Parser::parseComment() {
expect("!--");
if (skipTo("-->")) {
skip(3);
} else {
throw error("comment close missing");
}
}
std::string Parser::parseText() {
size_t start = pos;
while (hasNext()) {
char c = peek();
if (c == '<') {
break;
}
nextChar();
}
return source.substr(start, pos-start);
}
xmlelement Parser::parseElement() {
// text element
if (peek() != '<') {
auto element = std::make_shared<Node>("#");
auto text = parseText();
util::replaceAll(text, "&quot;", "\"");
util::replaceAll(text, "&apos;", "'");
util::replaceAll(text, "&lt;", "<");
util::replaceAll(text, "&gt;", ">");
util::replaceAll(text, "&amp;", "&");
element->set("#", text);
return element;
}
nextChar();
// <!--element-->
if (peek() == '!') {
if (isNext("!DOCTYPE ")) {
throw error("XML DTD is not supported yet");
}
parseComment();
return nullptr;
}
auto element = parseOpenTag();
char c = nextChar();
// <element/>
if (c == '/') {
expect('>');
}
// <element>...</element>
else if (c == '>') {
skipWhitespace();
while (!isNext("</")) {
auto sub = parseElement();
if (sub) {
element->add(sub);
}
skipWhitespace();
}
skip(2);
expect(element->getTag());
expect('>');
}
// <element?>
else {
throw error("invalid syntax");
}
return element;
}
xmldocument Parser::parse() {
parseDeclaration();
document->setRoot(parseElement());
return document;
}
xmldocument xml::parse(std::string filename, std::string source) {
Parser parser(filename, source);
return parser.parse();
}
inline void newline(
std::stringstream& ss,
bool nice,
const std::string& indentStr,
int indent
) {
if (!nice)
return;
ss << '\n';
for (int i = 0; i < indent; i++) {
ss << indentStr;
}
}
static void stringifyElement(
std::stringstream& ss,
const xmlelement element,
bool nice,
const std::string& indentStr,
int indent
) {
if (element->isText()) {
std::string text = element->attr("#").getText();
util::replaceAll(text, "&", "&amp;");
util::replaceAll(text, "\"","&quot;");
util::replaceAll(text, "'", "&apos;");
util::replaceAll(text, "<", "&lt;");
util::replaceAll(text, ">", "&gt;");
ss << text;
return;
}
const std::string& tag = element->getTag();
ss << '<' << tag;
auto& attrs = element->getAttributes();
if (!attrs.empty()) {
ss << ' ';
int count = 0;
for (auto& entry : attrs) {
auto attr = entry.second;
ss << attr.getName();
if (!attr.getText().empty()) {
ss << "=" << escape_string(attr.getText());
}
if (count + 1 < int(attrs.size())) {
ss << " ";
}
count++;
}
}
auto& elements = element->getElements();
if (elements.size() == 1 && elements[0]->isText()) {
ss << ">";
stringifyElement(ss, elements[0], nice, indentStr, indent+1);
ss << "</" << tag << ">";
return;
}
if (!elements.empty()) {
ss << '>';
for (auto& sub : elements) {
newline(ss, nice, indentStr, indent+1);
stringifyElement(ss, sub, nice, indentStr, indent+1);
}
newline(ss, nice, indentStr, indent);
ss << "</" << tag << ">";
} else {
ss << "/>";
}
}
std::string xml::stringify(
const xmldocument document,
bool nice,
const std::string& indentStr
) {
std::stringstream ss;
// XML declaration
ss << "<?xml version=\"" << document->getVersion();
ss << "\" encoding=\"UTF-8\" ?>";
newline(ss, nice, indentStr, 0);
stringifyElement(ss, document->getRoot(), nice, indentStr, 0);
return ss.str();
}

132
src/coders/xml.h Normal file
View File

@ -0,0 +1,132 @@
#ifndef CODERS_XML_H_
#define CODERS_XML_H_
#include <string>
#include <memory>
#include <vector>
#include <unordered_map>
#include "commons.h"
namespace xml {
class Node;
class Attribute;
class Document;
typedef Attribute xmlattribute;
typedef std::shared_ptr<Node> xmlelement;
typedef std::shared_ptr<Document> xmldocument;
typedef std::unordered_map<std::string, xmlattribute> xmlelements_map;
class Attribute {
std::string name;
std::string text;
public:
Attribute(std::string name, std::string text);
const std::string& getName() const;
const std::string& getText() const;
int64_t asInt() const;
double asFloat() const;
bool asBool() const;
};
/* XML element class. Text element has tag 'text' and attribute 'text' */
class Node {
std::string tag;
std::unordered_map<std::string, xmlattribute> attrs;
std::vector<xmlelement> elements;
public:
Node(std::string tag);
/* Add sub-element */
void add(xmlelement element);
/* Set attribute value. Creates attribute if does not exists */
void set(std::string name, std::string text);
/* Get element tag */
const std::string& getTag() const;
inline bool isText() const {
return getTag() == "#";
}
inline const std::string& text() const {
return attr("#").getText();
}
/* Get attribute by name
@param name attribute name
@throws std::runtime_error if element has no attribute
@return xmlattribute - {name, value} */
const xmlattribute attr(const std::string& name) const;
/* Get attribute by name
@param name name
@param def default value will be returned wrapped in xmlattribute
if element has no attribute
@return xmlattribute - {name, value} or {name, def} if not found*/
const xmlattribute attr(const std::string& name, const std::string& def) const;
/* Check if element has attribute
@param name attribute name */
bool has(const std::string& name) const;
/* Get sub-element by index
@throws std::out_of_range if an invalid index given */
xmlelement sub(size_t index);
/* Get number of sub-elements */
size_t size() const;
const std::vector<xmlelement>& getElements() const;
const xmlelements_map& getAttributes() const;
};
class Document {
xmlelement root = nullptr;
std::string version;
std::string encoding;
public:
Document(std::string version, std::string encoding);
void setRoot(xmlelement element);
xmlelement getRoot() const;
const std::string& getVersion() const;
const std::string& getEncoding() const;
};
class Parser : public BasicParser {
xmldocument document;
xmlelement parseOpenTag();
xmlelement parseElement();
void parseDeclaration();
void parseComment();
std::string parseText();
public:
Parser(std::string filename, std::string source);
xmldocument parse();
};
/* Serialize XML Document to string
@param document serializing document
@param nice use human readable format
(with indents and line-separators)
@param indentStr indentation characters sequence
(default - 4 spaces)*/
extern std::string stringify(
const xmldocument document,
bool nice=true,
const std::string& indentStr=" "
);
/* Read XML Document from string
@param filename file name will be shown in error messages
@param source xml source code string */
extern xmldocument parse(std::string filename, std::string source);
}
#endif // CODERS_XML_H_

View File

@ -252,3 +252,18 @@ std::vector<ubyte> util::base64_decode(const char* str, size_t size) {
std::vector<ubyte> util::base64_decode(const std::string& str) {
return base64_decode(str.c_str(), str.size());
}
int util::replaceAll(std::string& str, const std::string& from, const std::string& to) {
int count = 0;
size_t offset = 0;
while (true) {
size_t start_pos = str.find(from, offset);
if(start_pos == std::string::npos)
break;
str.replace(start_pos, from.length(), to);
offset = start_pos + to.length();
count++;
break;
}
return count;
}

View File

@ -26,6 +26,8 @@ namespace util {
extern std::string base64_encode(const ubyte* data, size_t size);
extern std::vector<ubyte> base64_decode(const char* str, size_t size);
extern std::vector<ubyte> base64_decode(const std::string& str);
extern int replaceAll(std::string& str, const std::string& from, const std::string& to);
}
#endif // UTIL_STRINGUTIL_H_