From 5253be6c56227fda8b3cb315e444921f3670b19f Mon Sep 17 00:00:00 2001 From: MihailRis Date: Sun, 13 Apr 2025 00:19:38 +0300 Subject: [PATCH] feat: custom languages syntax (WIP) --- res/devtools/syntax/glsl.toml | 28 ++++ res/devtools/syntax/lua.toml | 12 ++ res/layouts/console.xml | 1 - res/layouts/console.xml.lua | 1 + src/coders/BasicParser.inl | 3 + src/coders/syntax_parser.cpp | 122 +++++++++++++----- src/coders/syntax_parser.hpp | 18 ++- src/devtools/Editor.cpp | 29 +++++ src/devtools/Editor.hpp | 23 ++++ ...x_highlighting.cpp => SyntaxProcessor.cpp} | 30 +++-- src/devtools/SyntaxProcessor.hpp | 29 +++++ src/devtools/syntax_highlighting.hpp | 16 --- src/engine/Engine.cpp | 3 + src/engine/Engine.hpp | 9 ++ src/graphics/ui/GUI.cpp | 4 + src/graphics/ui/GUI.hpp | 5 + src/graphics/ui/elements/TextBox.cpp | 6 +- test/coders/lua_parsing.cpp | 27 ---- 18 files changed, 278 insertions(+), 88 deletions(-) create mode 100644 res/devtools/syntax/glsl.toml create mode 100644 res/devtools/syntax/lua.toml create mode 100644 src/devtools/Editor.cpp create mode 100644 src/devtools/Editor.hpp rename src/devtools/{syntax_highlighting.cpp => SyntaxProcessor.cpp} (76%) create mode 100644 src/devtools/SyntaxProcessor.hpp delete mode 100644 src/devtools/syntax_highlighting.hpp delete mode 100644 test/coders/lua_parsing.cpp diff --git a/res/devtools/syntax/glsl.toml b/res/devtools/syntax/glsl.toml new file mode 100644 index 00000000..a2acb4d2 --- /dev/null +++ b/res/devtools/syntax/glsl.toml @@ -0,0 +1,28 @@ +language = "GLSL" +extensions = ["glsl", "glslv", "glslf"] +line-comment = "//" +multiline-comment-start = "/*" +multiline-comment-end = "*/" +keywords = [ + "attribute", "break", "bvec2", "bvec3", "bvec4", "centroid", "continue", + "discard", "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2", + "dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4", "dvec2", + "dvec3", "dvec4", "else", "flat", "float", "highp", "if", "in", "inout", + "int", "invariant", "isampler1D", "isampler1DArray", "isampler2D", + "isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect", + "isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray", + "ivec2", "ivec3", "ivec4", "layout", "lowp", "mat2", "mat2x2", "mat2x3", + "mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3", + "mat4x4", "mediump", "noperspective", "out", "patch", "precision", "return", + "sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow", + "sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow", + "sampler2DMS", "sampler2DMSArray", "sampler2DRect", "sampler2DRectShadow", + "sampler2DShadow", "sampler3D", "samplerBuffer", "samplerCube", + "samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "smooth", + "subroutine", "uniform", "usampler1D", "usampler1DArray", "usampler2D", + "usampler2DArray", "usampler2DMS", "usampler2DMSArray", "usampler2DRect", + "usampler3D", "usamplerBuffer", "usamplerCube", "usamplerCubeArray", + "uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void", + "while", +] + diff --git a/res/devtools/syntax/lua.toml b/res/devtools/syntax/lua.toml new file mode 100644 index 00000000..cbccce75 --- /dev/null +++ b/res/devtools/syntax/lua.toml @@ -0,0 +1,12 @@ +language = "Lua" +extensions = ["lua"] +line-comment = "--" +multiline-comment-start = "[==[" +multiline-comment-end = "]==]" +multiline-string-start = "[[" +multiline-string-end = "]]" +keywords = [ + "and", "break", "do", "else", "elseif", "end", "false", "for", "function", + "if", "in", "local", "nil", "not", "or", "repeat", "return", "then", "true", + "until", "while" +] diff --git a/res/layouts/console.xml b/res/layouts/console.xml index 9d015aab..bbfdcf7a 100644 --- a/res/layouts/console.xml +++ b/res/layouts/console.xml @@ -74,7 +74,6 @@ multiline='true' line-numbers='true' oncontrolkey='on_control_combination' - syntax='lua' size-func="-1,40" text-wrap='false' scroll-step='50' diff --git a/res/layouts/console.xml.lua b/res/layouts/console.xml.lua index 695408e1..c3fee31d 100644 --- a/res/layouts/console.xml.lua +++ b/res/layouts/console.xml.lua @@ -197,6 +197,7 @@ function open_file_in_editor(filename, line, mutable) editor.scroll = 0 editor.text = source editor.focused = true + editor.syntax = file.ext(filename) if line then time.post_runnable(function() editor.caret = editor:linePos(line) diff --git a/src/coders/BasicParser.inl b/src/coders/BasicParser.inl index 7ed9c10d..1bbe1278 100644 --- a/src/coders/BasicParser.inl +++ b/src/coders/BasicParser.inl @@ -166,6 +166,9 @@ size_t BasicParser::remain() const { template bool BasicParser::isNext(const std::basic_string& substring) { + if (substring.empty()) { + return false; + } if (source.length() - pos < substring.length()) { return false; } diff --git a/src/coders/syntax_parser.cpp b/src/coders/syntax_parser.cpp index fb26f622..4b56b607 100644 --- a/src/coders/syntax_parser.cpp +++ b/src/coders/syntax_parser.cpp @@ -2,26 +2,73 @@ #include +#include "data/dv.hpp" +#include "util/stringutil.hpp" #include "BasicParser.hpp" using namespace devtools; -static std::set keywords { - L"and", L"break", L"do", L"else", L"elseif", L"end", L"false", L"for", L"function", - L"if", L"in", L"local", L"nil", L"not", L"or", L"repeat", L"return", L"then", L"true", - L"until", L"while" -}; +dv::value Syntax::serialize() const { + auto map = dv::object(); + map["language"] = language; + map["line-comment"] = util::wstr2str_utf8(lineComment); + map["multiline-comment-start"] = util::wstr2str_utf8(multilineCommentStart); + map["multiline-comment-end"] = util::wstr2str_utf8(multilineCommentEnd); + map["multiline-string-start"] = util::wstr2str_utf8(multilineStringStart); + map["multiline-string-end"] = util::wstr2str_utf8(multilineStringEnd); -static bool is_lua_keyword(std::wstring_view view) { - return keywords.find(view) != keywords.end(); + auto& extsList = map.list("extensions"); + for (const auto& ext : extensions) { + extsList.add(ext); + } + + auto& keywordsList = map.list("keywords"); + for (const auto& keyword : keywords) { + keywordsList.add(util::wstr2str_utf8(keyword)); + } + return map; } -inline bool is_lua_identifier_start(int c) { +void Syntax::deserialize(const dv::value& src) { + src.at("language").get(language); + + std::string lineComment; + std::string multilineCommentStart; + std::string multilineCommentEnd; + std::string multilineStringStart; + std::string multilineStringEnd; + src.at("line-comment").get(lineComment); + src.at("multiline-comment-start").get(multilineCommentStart); + src.at("multiline-comment-end").get(multilineCommentEnd); + src.at("multiline-string-start").get(multilineStringStart); + src.at("multiline-string-end").get(multilineStringEnd); + this->lineComment = util::str2wstr_utf8(lineComment); + this->multilineCommentStart = util::str2wstr_utf8(multilineCommentStart); + this->multilineCommentEnd = util::str2wstr_utf8(multilineCommentEnd); + this->multilineStringStart = util::str2wstr_utf8(multilineStringStart); + this->multilineStringEnd = util::str2wstr_utf8(multilineStringEnd); + + if (src.has("extensions")) { + const auto& extsList = src["extensions"]; + for (const auto& ext : extsList) { + extensions.insert(ext.asString()); + } + } + + if (src.has("keywords")) { + const auto& keywordsList = src["keywords"]; + for (const auto& keyword : keywordsList) { + keywords.insert(util::str2wstr_utf8(keyword.asString())); + } + } +} + +inline bool is_common_identifier_start(int c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_'; } -inline bool is_lua_identifier_part(int c) { - return is_lua_identifier_start(c) || is_digit(c); +inline bool is_common_identifier_part(int c) { + return is_common_identifier_start(c) || is_digit(c); } inline bool is_lua_operator_start(int c) { @@ -31,10 +78,13 @@ inline bool is_lua_operator_start(int c) { } class Tokenizer : BasicParser { + const Syntax& syntax; std::vector tokens; public: - Tokenizer(std::string_view file, std::wstring_view source) - : BasicParser(file, source) { + Tokenizer( + const Syntax& syntax, std::string_view file, std::wstring_view source + ) + : BasicParser(file, source), syntax(syntax) { } std::wstring parseLuaName() { @@ -110,9 +160,12 @@ public: } wchar_t c = peek(); auto start = currentLocation(); - if (is_lua_identifier_start(c)) { + if (is_common_identifier_start(c)) { auto name = parseLuaName(); - TokenTag tag = (is_lua_keyword(name) ? TokenTag::KEYWORD : TokenTag::NAME); + TokenTag tag = + (syntax.keywords.find(name) == syntax.keywords.end() + ? TokenTag::NAME + : TokenTag::KEYWORD); emitToken( tag, std::move(name), @@ -132,24 +185,29 @@ public: emitToken(tag, std::wstring(literal), start); continue; } + const auto& mcommentStart = syntax.multilineCommentStart; + if (!mcommentStart.empty() && c == mcommentStart[0] && + isNext(syntax.multilineCommentStart)) { + auto string = readUntil(syntax.multilineCommentEnd, true); + skip(syntax.multilineCommentEnd.length()); + emitToken( + TokenTag::COMMENT, + std::wstring(string) + syntax.multilineCommentEnd, + start + ); + continue; + } + const auto& mstringStart = syntax.multilineStringStart; + if (!mstringStart.empty() && c == mstringStart[0] && + isNext(syntax.multilineStringStart)) { + skip(mstringStart.length()); + auto string = readUntil(syntax.multilineStringEnd, true); + skip(syntax.multilineStringEnd.length()); + emitToken(TokenTag::STRING, std::wstring(string), start); + continue; + } switch (c) { case '(': case '[': case '{': - if (isNext(L"[==[")) { - auto string = readUntil(L"]==]", true); - skip(4); - emitToken( - TokenTag::COMMENT, - std::wstring(string) + L"]==]", - start - ); - continue; - } else if (isNext(L"[[")) { - skip(2); - auto string = readUntil(L"]]", true); - skip(2); - emitToken(TokenTag::STRING, std::wstring(string), start); - continue; - } emitToken(TokenTag::OPEN_BRACKET, std::wstring({c}), start, true); continue; case ')': case ']': case '}': @@ -188,7 +246,7 @@ public: }; std::vector devtools::tokenize( - std::string_view file, std::wstring_view source + const Syntax& syntax, std::string_view file, std::wstring_view source ) { - return Tokenizer(file, source).tokenize(); + return Tokenizer(syntax, file, source).tokenize(); } diff --git a/src/coders/syntax_parser.hpp b/src/coders/syntax_parser.hpp index d8166a0a..c7bd254c 100644 --- a/src/coders/syntax_parser.hpp +++ b/src/coders/syntax_parser.hpp @@ -1,12 +1,28 @@ #pragma once +#include #include #include #include "devtools/syntax.hpp" +#include "interfaces/Serializable.hpp" namespace devtools { + struct Syntax : Serializable { + std::string language; + std::set extensions; + std::set keywords; + std::wstring lineComment; + std::wstring multilineCommentStart; + std::wstring multilineCommentEnd; + std::wstring multilineStringStart; + std::wstring multilineStringEnd; + + dv::value serialize() const override; + void deserialize(const dv::value& src) override; + }; + std::vector tokenize( - std::string_view file, std::wstring_view source + const Syntax& syntax, std::string_view file, std::wstring_view source ); } diff --git a/src/devtools/Editor.cpp b/src/devtools/Editor.cpp new file mode 100644 index 00000000..82475ee4 --- /dev/null +++ b/src/devtools/Editor.cpp @@ -0,0 +1,29 @@ +#include "Editor.hpp" + +#include "engine/Engine.hpp" +#include "io/engine_paths.hpp" +#include "coders/syntax_parser.hpp" +#include "SyntaxProcessor.hpp" + +using namespace devtools; + +Editor::Editor(Engine& engine) + : engine(engine), syntaxProcessor(std::make_unique()) { +} + +Editor::~Editor() = default; + +void Editor::loadTools() { + const auto& paths = engine.getResPaths(); + auto files = paths.listdir("devtools/syntax"); + for (const auto& file : files) { + auto config = io::read_object(file); + auto syntax = std::make_unique(); + syntax->deserialize(config); + syntaxProcessor->addSyntax(std::move(syntax)); + } +} + +SyntaxProcessor& Editor::getSyntaxProcessor() { + return *syntaxProcessor; +} diff --git a/src/devtools/Editor.hpp b/src/devtools/Editor.hpp new file mode 100644 index 00000000..8b79dd54 --- /dev/null +++ b/src/devtools/Editor.hpp @@ -0,0 +1,23 @@ +#pragma once + +#include +#include + +class Engine; + +namespace devtools { + class SyntaxProcessor; + + class Editor { + public: + Editor(Engine& engine); + ~Editor(); + + void loadTools(); + + SyntaxProcessor& getSyntaxProcessor(); + private: + Engine& engine; + std::unique_ptr syntaxProcessor; + }; +} diff --git a/src/devtools/syntax_highlighting.cpp b/src/devtools/SyntaxProcessor.cpp similarity index 76% rename from src/devtools/syntax_highlighting.cpp rename to src/devtools/SyntaxProcessor.cpp index 748f9aaa..a4a1c7cf 100644 --- a/src/devtools/syntax_highlighting.cpp +++ b/src/devtools/SyntaxProcessor.cpp @@ -1,4 +1,4 @@ -#include "syntax_highlighting.hpp" +#include "SyntaxProcessor.hpp" #include "coders/commons.hpp" #include "coders/syntax_parser.hpp" @@ -55,16 +55,28 @@ static std::unique_ptr build_styles( return std::make_unique(std::move(styles)); } -std::unique_ptr devtools::syntax_highlight( - const std::string& lang, std::wstring_view source +void SyntaxProcessor::addSyntax( + std::unique_ptr syntax ) { + const auto ptr = syntax.get(); + langs.emplace_back(std::move(syntax)); + + for (auto& ext : ptr->extensions) { + langsExtensions[ext] = ptr; + } +} + +std::unique_ptr SyntaxProcessor::highlight( + const std::string& ext, std::wstring_view source +) const { + const auto& found = langsExtensions.find(ext); + if (found == langsExtensions.end()) { + return nullptr; + } + const auto& syntax = *found->second; try { - if (lang == "lua") { - auto tokens = tokenize("", source); - return build_styles(tokens); - } else { - return nullptr; - } + auto tokens = tokenize(syntax, "", source); + return build_styles(tokens); } catch (const parsing_error& err) { return nullptr; } diff --git a/src/devtools/SyntaxProcessor.hpp b/src/devtools/SyntaxProcessor.hpp new file mode 100644 index 00000000..afb975a8 --- /dev/null +++ b/src/devtools/SyntaxProcessor.hpp @@ -0,0 +1,29 @@ +#pragma once + +#include +#include +#include +#include +#include + +struct FontStylesScheme; + +namespace devtools { + struct Syntax; + + enum SyntaxStyles { + DEFAULT, KEYWORD, LITERAL, COMMENT, ERROR + }; + + class SyntaxProcessor { + public: + std::unique_ptr highlight( + const std::string& ext, std::wstring_view source + ) const; + + void addSyntax(std::unique_ptr syntax); + private: + std::vector> langs; + std::unordered_map langsExtensions; + }; +} diff --git a/src/devtools/syntax_highlighting.hpp b/src/devtools/syntax_highlighting.hpp deleted file mode 100644 index e30b0084..00000000 --- a/src/devtools/syntax_highlighting.hpp +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once - -#include -#include - -struct FontStylesScheme; - -namespace devtools { - enum SyntaxStyles { - DEFAULT, KEYWORD, LITERAL, COMMENT, ERROR - }; - - std::unique_ptr syntax_highlight( - const std::string& lang, std::wstring_view source - ); -} diff --git a/src/engine/Engine.cpp b/src/engine/Engine.cpp index 112ece7f..fb53978a 100644 --- a/src/engine/Engine.cpp +++ b/src/engine/Engine.cpp @@ -12,6 +12,7 @@ #include "coders/json.hpp" #include "coders/toml.hpp" #include "coders/commons.hpp" +#include "devtools/Editor.hpp" #include "content/ContentControl.hpp" #include "core_defs.hpp" #include "io/io.hpp" @@ -73,6 +74,7 @@ Engine& Engine::getInstance() { void Engine::initialize(CoreParameters coreParameters) { params = std::move(coreParameters); settingsHandler = std::make_unique(settings); + editor = std::make_unique(*this); cmd = std::make_unique(); network = network::Network::create(settings.network); @@ -134,6 +136,7 @@ void Engine::initialize(CoreParameters coreParameters) { ); } content = std::make_unique(paths, *input, [this]() { + editor->loadTools(); langs::setup(langs::get_current(), paths.resPaths.collectRoots()); if (!isHeadless()) { for (auto& pack : content->getAllContentPacks()) { diff --git a/src/engine/Engine.hpp b/src/engine/Engine.hpp index f352d12b..5e854ae3 100644 --- a/src/engine/Engine.hpp +++ b/src/engine/Engine.hpp @@ -33,6 +33,10 @@ namespace network { class Network; } +namespace devtools { + class Editor; +} + class initialize_error : public std::runtime_error { public: initialize_error(const std::string& message) : std::runtime_error(message) {} @@ -63,6 +67,7 @@ class Engine : public util::ObjectsKeeper { std::unique_ptr window; std::unique_ptr input; std::unique_ptr gui; + std::unique_ptr editor; PostRunnables postRunnables; Time time; OnWorldOpen levelConsumer; @@ -161,4 +166,8 @@ public: cmd::CommandsInterpreter& getCmd() { return *cmd; } + + devtools::Editor& getEditor() { + return *editor; + } }; diff --git a/src/graphics/ui/GUI.cpp b/src/graphics/ui/GUI.cpp index 79883136..828315e6 100644 --- a/src/graphics/ui/GUI.cpp +++ b/src/graphics/ui/GUI.cpp @@ -364,3 +364,7 @@ Input& GUI::getInput() { Window& GUI::getWindow() { return engine.getWindow(); } + +devtools::Editor& GUI::getEditor() { + return engine.getEditor(); +} diff --git a/src/graphics/ui/GUI.hpp b/src/graphics/ui/GUI.hpp index ca8f5508..b5c70bdd 100644 --- a/src/graphics/ui/GUI.hpp +++ b/src/graphics/ui/GUI.hpp @@ -18,6 +18,10 @@ class Engine; class Input; class Window; +namespace devtools { + class Editor; +} + /* Some info about padding and margin. Padding is element inner space, margin is outer @@ -159,5 +163,6 @@ namespace gui { const Input& getInput() const; Input& getInput(); Window& getWindow(); + devtools::Editor& getEditor(); }; } diff --git a/src/graphics/ui/elements/TextBox.cpp b/src/graphics/ui/elements/TextBox.cpp index 3ab4306c..f271cc23 100644 --- a/src/graphics/ui/elements/TextBox.cpp +++ b/src/graphics/ui/elements/TextBox.cpp @@ -8,7 +8,8 @@ #include "../markdown.hpp" #include "Label.hpp" #include "assets/Assets.hpp" -#include "devtools/syntax_highlighting.hpp" +#include "devtools/Editor.hpp" +#include "devtools/SyntaxProcessor.hpp" #include "engine/Engine.hpp" #include "graphics/core/Batch2D.hpp" #include "graphics/core/DrawContext.hpp" @@ -811,7 +812,8 @@ void TextBox::stepDefaultUp(bool shiftPressed, bool breakSelection) { void TextBox::refreshSyntax() { if (!syntax.empty()) { - if (auto styles = devtools::syntax_highlight(syntax, input)) { + const auto& processor = gui.getEditor().getSyntaxProcessor(); + if (auto styles = processor.highlight(syntax, input)) { label->setStyles(std::move(styles)); } } diff --git a/test/coders/lua_parsing.cpp b/test/coders/lua_parsing.cpp deleted file mode 100644 index 2bc547c4..00000000 --- a/test/coders/lua_parsing.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include "coders/syntax_parser.hpp" - -#include - -#include "coders/commons.hpp" -#include "io/io.hpp" -#include "io/devices/StdfsDevice.hpp" -#include "util/stringutil.hpp" - -namespace fs = std::filesystem; - -TEST(lua_parsing, Tokenizer) { - io::set_device("res", std::make_shared(fs::u8path("../../res"))); - auto filename = "res:scripts/stdlib.lua"; - auto source = io::read_string(filename); - try { - auto tokens = devtools::tokenize(filename, util::str2wstr_utf8(source)); - for (const auto& token : tokens) { - std::cout << (int)token.tag << " " - << util::quote(util::wstr2str_utf8(token.text)) - << std::endl; - } - } catch (const parsing_error& err) { - std::cerr << err.errorLog() << std::endl; - throw err; - } -}