feat: custom languages syntax (WIP)

This commit is contained in:
MihailRis 2025-04-13 00:19:38 +03:00
parent 4360cd408b
commit 5253be6c56
18 changed files with 278 additions and 88 deletions

View File

@ -0,0 +1,28 @@
language = "GLSL"
extensions = ["glsl", "glslv", "glslf"]
line-comment = "//"
multiline-comment-start = "/*"
multiline-comment-end = "*/"
keywords = [
"attribute", "break", "bvec2", "bvec3", "bvec4", "centroid", "continue",
"discard", "dmat2", "dmat2x2", "dmat2x3", "dmat2x4", "dmat3", "dmat3x2",
"dmat3x3", "dmat3x4", "dmat4", "dmat4x2", "dmat4x3", "dmat4x4", "dvec2",
"dvec3", "dvec4", "else", "flat", "float", "highp", "if", "in", "inout",
"int", "invariant", "isampler1D", "isampler1DArray", "isampler2D",
"isampler2DArray", "isampler2DMS", "isampler2DMSArray", "isampler2DRect",
"isampler3D", "isamplerBuffer", "isamplerCube", "isamplerCubeArray",
"ivec2", "ivec3", "ivec4", "layout", "lowp", "mat2", "mat2x2", "mat2x3",
"mat2x4", "mat3", "mat3x2", "mat3x3", "mat3x4", "mat4", "mat4x2", "mat4x3",
"mat4x4", "mediump", "noperspective", "out", "patch", "precision", "return",
"sample", "sampler1D", "sampler1DArray", "sampler1DArrayShadow",
"sampler1DShadow", "sampler2D", "sampler2DArray", "sampler2DArrayShadow",
"sampler2DMS", "sampler2DMSArray", "sampler2DRect", "sampler2DRectShadow",
"sampler2DShadow", "sampler3D", "samplerBuffer", "samplerCube",
"samplerCubeArray", "samplerCubeArrayShadow", "samplerCubeShadow", "smooth",
"subroutine", "uniform", "usampler1D", "usampler1DArray", "usampler2D",
"usampler2DArray", "usampler2DMS", "usampler2DMSArray", "usampler2DRect",
"usampler3D", "usamplerBuffer", "usamplerCube", "usamplerCubeArray",
"uvec2", "uvec3", "uvec4", "varying", "vec2", "vec3", "vec4", "void",
"while",
]

View File

@ -0,0 +1,12 @@
language = "Lua"
extensions = ["lua"]
line-comment = "--"
multiline-comment-start = "[==["
multiline-comment-end = "]==]"
multiline-string-start = "[["
multiline-string-end = "]]"
keywords = [
"and", "break", "do", "else", "elseif", "end", "false", "for", "function",
"if", "in", "local", "nil", "not", "or", "repeat", "return", "then", "true",
"until", "while"
]

View File

@ -74,7 +74,6 @@
multiline='true'
line-numbers='true'
oncontrolkey='on_control_combination'
syntax='lua'
size-func="-1,40"
text-wrap='false'
scroll-step='50'

View File

@ -197,6 +197,7 @@ function open_file_in_editor(filename, line, mutable)
editor.scroll = 0
editor.text = source
editor.focused = true
editor.syntax = file.ext(filename)
if line then
time.post_runnable(function()
editor.caret = editor:linePos(line)

View File

@ -166,6 +166,9 @@ size_t BasicParser<CharT>::remain() const {
template<typename CharT>
bool BasicParser<CharT>::isNext(const std::basic_string<CharT>& substring) {
if (substring.empty()) {
return false;
}
if (source.length() - pos < substring.length()) {
return false;
}

View File

@ -2,26 +2,73 @@
#include <set>
#include "data/dv.hpp"
#include "util/stringutil.hpp"
#include "BasicParser.hpp"
using namespace devtools;
static std::set<std::wstring_view> keywords {
L"and", L"break", L"do", L"else", L"elseif", L"end", L"false", L"for", L"function",
L"if", L"in", L"local", L"nil", L"not", L"or", L"repeat", L"return", L"then", L"true",
L"until", L"while"
};
dv::value Syntax::serialize() const {
auto map = dv::object();
map["language"] = language;
map["line-comment"] = util::wstr2str_utf8(lineComment);
map["multiline-comment-start"] = util::wstr2str_utf8(multilineCommentStart);
map["multiline-comment-end"] = util::wstr2str_utf8(multilineCommentEnd);
map["multiline-string-start"] = util::wstr2str_utf8(multilineStringStart);
map["multiline-string-end"] = util::wstr2str_utf8(multilineStringEnd);
static bool is_lua_keyword(std::wstring_view view) {
return keywords.find(view) != keywords.end();
auto& extsList = map.list("extensions");
for (const auto& ext : extensions) {
extsList.add(ext);
}
auto& keywordsList = map.list("keywords");
for (const auto& keyword : keywords) {
keywordsList.add(util::wstr2str_utf8(keyword));
}
return map;
}
inline bool is_lua_identifier_start(int c) {
void Syntax::deserialize(const dv::value& src) {
src.at("language").get(language);
std::string lineComment;
std::string multilineCommentStart;
std::string multilineCommentEnd;
std::string multilineStringStart;
std::string multilineStringEnd;
src.at("line-comment").get(lineComment);
src.at("multiline-comment-start").get(multilineCommentStart);
src.at("multiline-comment-end").get(multilineCommentEnd);
src.at("multiline-string-start").get(multilineStringStart);
src.at("multiline-string-end").get(multilineStringEnd);
this->lineComment = util::str2wstr_utf8(lineComment);
this->multilineCommentStart = util::str2wstr_utf8(multilineCommentStart);
this->multilineCommentEnd = util::str2wstr_utf8(multilineCommentEnd);
this->multilineStringStart = util::str2wstr_utf8(multilineStringStart);
this->multilineStringEnd = util::str2wstr_utf8(multilineStringEnd);
if (src.has("extensions")) {
const auto& extsList = src["extensions"];
for (const auto& ext : extsList) {
extensions.insert(ext.asString());
}
}
if (src.has("keywords")) {
const auto& keywordsList = src["keywords"];
for (const auto& keyword : keywordsList) {
keywords.insert(util::str2wstr_utf8(keyword.asString()));
}
}
}
inline bool is_common_identifier_start(int c) {
return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_';
}
inline bool is_lua_identifier_part(int c) {
return is_lua_identifier_start(c) || is_digit(c);
inline bool is_common_identifier_part(int c) {
return is_common_identifier_start(c) || is_digit(c);
}
inline bool is_lua_operator_start(int c) {
@ -31,10 +78,13 @@ inline bool is_lua_operator_start(int c) {
}
class Tokenizer : BasicParser<wchar_t> {
const Syntax& syntax;
std::vector<Token> tokens;
public:
Tokenizer(std::string_view file, std::wstring_view source)
: BasicParser(file, source) {
Tokenizer(
const Syntax& syntax, std::string_view file, std::wstring_view source
)
: BasicParser(file, source), syntax(syntax) {
}
std::wstring parseLuaName() {
@ -110,9 +160,12 @@ public:
}
wchar_t c = peek();
auto start = currentLocation();
if (is_lua_identifier_start(c)) {
if (is_common_identifier_start(c)) {
auto name = parseLuaName();
TokenTag tag = (is_lua_keyword(name) ? TokenTag::KEYWORD : TokenTag::NAME);
TokenTag tag =
(syntax.keywords.find(name) == syntax.keywords.end()
? TokenTag::NAME
: TokenTag::KEYWORD);
emitToken(
tag,
std::move(name),
@ -132,24 +185,29 @@ public:
emitToken(tag, std::wstring(literal), start);
continue;
}
const auto& mcommentStart = syntax.multilineCommentStart;
if (!mcommentStart.empty() && c == mcommentStart[0] &&
isNext(syntax.multilineCommentStart)) {
auto string = readUntil(syntax.multilineCommentEnd, true);
skip(syntax.multilineCommentEnd.length());
emitToken(
TokenTag::COMMENT,
std::wstring(string) + syntax.multilineCommentEnd,
start
);
continue;
}
const auto& mstringStart = syntax.multilineStringStart;
if (!mstringStart.empty() && c == mstringStart[0] &&
isNext(syntax.multilineStringStart)) {
skip(mstringStart.length());
auto string = readUntil(syntax.multilineStringEnd, true);
skip(syntax.multilineStringEnd.length());
emitToken(TokenTag::STRING, std::wstring(string), start);
continue;
}
switch (c) {
case '(': case '[': case '{':
if (isNext(L"[==[")) {
auto string = readUntil(L"]==]", true);
skip(4);
emitToken(
TokenTag::COMMENT,
std::wstring(string) + L"]==]",
start
);
continue;
} else if (isNext(L"[[")) {
skip(2);
auto string = readUntil(L"]]", true);
skip(2);
emitToken(TokenTag::STRING, std::wstring(string), start);
continue;
}
emitToken(TokenTag::OPEN_BRACKET, std::wstring({c}), start, true);
continue;
case ')': case ']': case '}':
@ -188,7 +246,7 @@ public:
};
std::vector<Token> devtools::tokenize(
std::string_view file, std::wstring_view source
const Syntax& syntax, std::string_view file, std::wstring_view source
) {
return Tokenizer(file, source).tokenize();
return Tokenizer(syntax, file, source).tokenize();
}

View File

@ -1,12 +1,28 @@
#pragma once
#include <set>
#include <string>
#include <vector>
#include "devtools/syntax.hpp"
#include "interfaces/Serializable.hpp"
namespace devtools {
struct Syntax : Serializable {
std::string language;
std::set<std::string> extensions;
std::set<std::wstring> keywords;
std::wstring lineComment;
std::wstring multilineCommentStart;
std::wstring multilineCommentEnd;
std::wstring multilineStringStart;
std::wstring multilineStringEnd;
dv::value serialize() const override;
void deserialize(const dv::value& src) override;
};
std::vector<Token> tokenize(
std::string_view file, std::wstring_view source
const Syntax& syntax, std::string_view file, std::wstring_view source
);
}

29
src/devtools/Editor.cpp Normal file
View File

@ -0,0 +1,29 @@
#include "Editor.hpp"
#include "engine/Engine.hpp"
#include "io/engine_paths.hpp"
#include "coders/syntax_parser.hpp"
#include "SyntaxProcessor.hpp"
using namespace devtools;
Editor::Editor(Engine& engine)
: engine(engine), syntaxProcessor(std::make_unique<SyntaxProcessor>()) {
}
Editor::~Editor() = default;
void Editor::loadTools() {
const auto& paths = engine.getResPaths();
auto files = paths.listdir("devtools/syntax");
for (const auto& file : files) {
auto config = io::read_object(file);
auto syntax = std::make_unique<Syntax>();
syntax->deserialize(config);
syntaxProcessor->addSyntax(std::move(syntax));
}
}
SyntaxProcessor& Editor::getSyntaxProcessor() {
return *syntaxProcessor;
}

23
src/devtools/Editor.hpp Normal file
View File

@ -0,0 +1,23 @@
#pragma once
#include <string>
#include <memory>
class Engine;
namespace devtools {
class SyntaxProcessor;
class Editor {
public:
Editor(Engine& engine);
~Editor();
void loadTools();
SyntaxProcessor& getSyntaxProcessor();
private:
Engine& engine;
std::unique_ptr<SyntaxProcessor> syntaxProcessor;
};
}

View File

@ -1,4 +1,4 @@
#include "syntax_highlighting.hpp"
#include "SyntaxProcessor.hpp"
#include "coders/commons.hpp"
#include "coders/syntax_parser.hpp"
@ -55,16 +55,28 @@ static std::unique_ptr<FontStylesScheme> build_styles(
return std::make_unique<FontStylesScheme>(std::move(styles));
}
std::unique_ptr<FontStylesScheme> devtools::syntax_highlight(
const std::string& lang, std::wstring_view source
void SyntaxProcessor::addSyntax(
std::unique_ptr<Syntax> syntax
) {
const auto ptr = syntax.get();
langs.emplace_back(std::move(syntax));
for (auto& ext : ptr->extensions) {
langsExtensions[ext] = ptr;
}
}
std::unique_ptr<FontStylesScheme> SyntaxProcessor::highlight(
const std::string& ext, std::wstring_view source
) const {
const auto& found = langsExtensions.find(ext);
if (found == langsExtensions.end()) {
return nullptr;
}
const auto& syntax = *found->second;
try {
if (lang == "lua") {
auto tokens = tokenize("<string>", source);
return build_styles(tokens);
} else {
return nullptr;
}
auto tokens = tokenize(syntax, "<string>", source);
return build_styles(tokens);
} catch (const parsing_error& err) {
return nullptr;
}

View File

@ -0,0 +1,29 @@
#pragma once
#include <set>
#include <string>
#include <memory>
#include <vector>
#include <unordered_map>
struct FontStylesScheme;
namespace devtools {
struct Syntax;
enum SyntaxStyles {
DEFAULT, KEYWORD, LITERAL, COMMENT, ERROR
};
class SyntaxProcessor {
public:
std::unique_ptr<FontStylesScheme> highlight(
const std::string& ext, std::wstring_view source
) const;
void addSyntax(std::unique_ptr<Syntax> syntax);
private:
std::vector<std::unique_ptr<Syntax>> langs;
std::unordered_map<std::string, const Syntax*> langsExtensions;
};
}

View File

@ -1,16 +0,0 @@
#pragma once
#include <string>
#include <memory>
struct FontStylesScheme;
namespace devtools {
enum SyntaxStyles {
DEFAULT, KEYWORD, LITERAL, COMMENT, ERROR
};
std::unique_ptr<FontStylesScheme> syntax_highlight(
const std::string& lang, std::wstring_view source
);
}

View File

@ -12,6 +12,7 @@
#include "coders/json.hpp"
#include "coders/toml.hpp"
#include "coders/commons.hpp"
#include "devtools/Editor.hpp"
#include "content/ContentControl.hpp"
#include "core_defs.hpp"
#include "io/io.hpp"
@ -73,6 +74,7 @@ Engine& Engine::getInstance() {
void Engine::initialize(CoreParameters coreParameters) {
params = std::move(coreParameters);
settingsHandler = std::make_unique<SettingsHandler>(settings);
editor = std::make_unique<devtools::Editor>(*this);
cmd = std::make_unique<cmd::CommandsInterpreter>();
network = network::Network::create(settings.network);
@ -134,6 +136,7 @@ void Engine::initialize(CoreParameters coreParameters) {
);
}
content = std::make_unique<ContentControl>(paths, *input, [this]() {
editor->loadTools();
langs::setup(langs::get_current(), paths.resPaths.collectRoots());
if (!isHeadless()) {
for (auto& pack : content->getAllContentPacks()) {

View File

@ -33,6 +33,10 @@ namespace network {
class Network;
}
namespace devtools {
class Editor;
}
class initialize_error : public std::runtime_error {
public:
initialize_error(const std::string& message) : std::runtime_error(message) {}
@ -63,6 +67,7 @@ class Engine : public util::ObjectsKeeper {
std::unique_ptr<Window> window;
std::unique_ptr<Input> input;
std::unique_ptr<gui::GUI> gui;
std::unique_ptr<devtools::Editor> editor;
PostRunnables postRunnables;
Time time;
OnWorldOpen levelConsumer;
@ -161,4 +166,8 @@ public:
cmd::CommandsInterpreter& getCmd() {
return *cmd;
}
devtools::Editor& getEditor() {
return *editor;
}
};

View File

@ -364,3 +364,7 @@ Input& GUI::getInput() {
Window& GUI::getWindow() {
return engine.getWindow();
}
devtools::Editor& GUI::getEditor() {
return engine.getEditor();
}

View File

@ -18,6 +18,10 @@ class Engine;
class Input;
class Window;
namespace devtools {
class Editor;
}
/*
Some info about padding and margin.
Padding is element inner space, margin is outer
@ -159,5 +163,6 @@ namespace gui {
const Input& getInput() const;
Input& getInput();
Window& getWindow();
devtools::Editor& getEditor();
};
}

View File

@ -8,7 +8,8 @@
#include "../markdown.hpp"
#include "Label.hpp"
#include "assets/Assets.hpp"
#include "devtools/syntax_highlighting.hpp"
#include "devtools/Editor.hpp"
#include "devtools/SyntaxProcessor.hpp"
#include "engine/Engine.hpp"
#include "graphics/core/Batch2D.hpp"
#include "graphics/core/DrawContext.hpp"
@ -811,7 +812,8 @@ void TextBox::stepDefaultUp(bool shiftPressed, bool breakSelection) {
void TextBox::refreshSyntax() {
if (!syntax.empty()) {
if (auto styles = devtools::syntax_highlight(syntax, input)) {
const auto& processor = gui.getEditor().getSyntaxProcessor();
if (auto styles = processor.highlight(syntax, input)) {
label->setStyles(std::move(styles));
}
}

View File

@ -1,27 +0,0 @@
#include "coders/syntax_parser.hpp"
#include <gtest/gtest.h>
#include "coders/commons.hpp"
#include "io/io.hpp"
#include "io/devices/StdfsDevice.hpp"
#include "util/stringutil.hpp"
namespace fs = std::filesystem;
TEST(lua_parsing, Tokenizer) {
io::set_device("res", std::make_shared<io::StdfsDevice>(fs::u8path("../../res")));
auto filename = "res:scripts/stdlib.lua";
auto source = io::read_string(filename);
try {
auto tokens = devtools::tokenize(filename, util::str2wstr_utf8(source));
for (const auto& token : tokens) {
std::cout << (int)token.tag << " "
<< util::quote(util::wstr2str_utf8(token.text))
<< std::endl;
}
} catch (const parsing_error& err) {
std::cerr << err.errorLog() << std::endl;
throw err;
}
}