Merge pull request #329 from MihailRis/add-utf8-lib
Add Lua utf8 library
This commit is contained in:
commit
8a594c41b4
@ -21,6 +21,7 @@ Subsections:
|
||||
- [player](scripting/builtins/libplayer.md)
|
||||
- [quat](scripting/builtins/libquat.md)
|
||||
- [time](scripting/builtins/libtime.md)
|
||||
- [utf8](scripting/builtins/libutf8.md)
|
||||
- [vec2, vec3, vec4](scripting/builtins/libvecn.md)
|
||||
- [world](scripting/builtins/libworld.md)
|
||||
- [Module core:bit_converter](scripting/modules/core_bit_converter.md)
|
||||
|
||||
21
doc/en/scripting/builtins/libutf8.md
Normal file
21
doc/en/scripting/builtins/libutf8.md
Normal file
@ -0,0 +1,21 @@
|
||||
# *utf8* library
|
||||
|
||||
The library provides functions for working with UTF-8.
|
||||
|
||||
```lua
|
||||
-- Converts a UTF-8 string to a Bytearray or an array of numbers if
|
||||
-- the second argument is true
|
||||
utf8.tobytes(text: str, [optional] usetable=false) -> Bytearray|table
|
||||
|
||||
-- Converts a Bytearray or an array of numbers to a UTF-8 string
|
||||
utf8.tostring(bytes: Bytearray|table) -> str
|
||||
|
||||
-- Returns the length of a Unicode string
|
||||
utf8.length(text: str) -> int
|
||||
|
||||
-- Returns the code of the first character of the string
|
||||
utf8.codepoint(chars: str) -> int
|
||||
|
||||
-- Returns a substring from position startchar to endchar inclusive
|
||||
utf8.sub(text: str, startchar: int, [optional] endchar: int) -> str
|
||||
```
|
||||
@ -21,6 +21,7 @@
|
||||
- [player](scripting/builtins/libplayer.md)
|
||||
- [quat](scripting/builtins/libquat.md)
|
||||
- [time](scripting/builtins/libtime.md)
|
||||
- [utf8](scripting/builtins/libutf8.md)
|
||||
- [vec2, vec3, vec4](scripting/builtins/libvecn.md)
|
||||
- [world](scripting/builtins/libworld.md)
|
||||
- [Модуль core:bit_converter](scripting/modules/core_bit_converter.md)
|
||||
|
||||
21
doc/ru/scripting/builtins/libutf8.md
Normal file
21
doc/ru/scripting/builtins/libutf8.md
Normal file
@ -0,0 +1,21 @@
|
||||
# Библиотека *utf8*
|
||||
|
||||
Библиотека предоставляет функции для работы с UTF-8.
|
||||
|
||||
```lua
|
||||
-- Конвертирует UTF-8 строку в Bytearray или массив чисел если
|
||||
-- второй аргумент - true
|
||||
utf8.tobytes(text: str, [опционально] usetable=false) -> Bytearray|table
|
||||
|
||||
-- Конвертирует Bytearray или массив чисел в UTF-8 строку
|
||||
utf8.tostring(bytes: Bytearray|table) -> str
|
||||
|
||||
-- Возвращает длину юникод-строки
|
||||
utf8.length(text: str) -> int
|
||||
|
||||
-- Возвращает код первого символа строки
|
||||
utf8.codepoint(chars: str) -> int
|
||||
|
||||
-- Возвращает подстроку от позиции startchar до endchar включительно
|
||||
utf8.sub(text: str, startchar: int, [опционально] endchar: int) -> str
|
||||
```
|
||||
@ -35,6 +35,7 @@ extern const luaL_Reg playerlib[];
|
||||
extern const luaL_Reg quatlib[]; // quat.cpp
|
||||
extern const luaL_Reg timelib[];
|
||||
extern const luaL_Reg tomllib[];
|
||||
extern const luaL_Reg utf8lib[];
|
||||
extern const luaL_Reg vec2lib[]; // vecn.cpp
|
||||
extern const luaL_Reg vec3lib[]; // vecn.cpp
|
||||
extern const luaL_Reg vec4lib[]; // vecn.cpp
|
||||
|
||||
73
src/logic/scripting/lua/libs/libutf8.cpp
Normal file
73
src/logic/scripting/lua/libs/libutf8.cpp
Normal file
@ -0,0 +1,73 @@
|
||||
#include "api_lua.hpp"
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "../lua_custom_types.hpp"
|
||||
#include "util/stringutil.hpp"
|
||||
|
||||
static int l_encode(lua::State* L) {
|
||||
std::string_view string = lua::require_string(L, 1);
|
||||
if (lua::toboolean(L, 2)) {
|
||||
lua::createtable(L, string.length(), 0);
|
||||
for (size_t i = 0; i < string.length(); i++) {
|
||||
lua::pushinteger(L, string[i] & 0xFF);
|
||||
lua::rawseti(L, i+1);
|
||||
}
|
||||
} else {
|
||||
lua::newuserdata<lua::LuaBytearray>(L, string.length());
|
||||
auto bytearray = lua::touserdata<lua::LuaBytearray>(L, -1);
|
||||
bytearray->data().reserve(string.length());
|
||||
std::memcpy(bytearray->data().data(), string.data(), string.length());
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int l_decode(lua::State* L) {
|
||||
if (lua::istable(L, 1)) {
|
||||
size_t size = lua::objlen(L, 1);
|
||||
util::Buffer<char> buffer(size);
|
||||
return lua::pushstring(L, std::string(buffer.data(), size));
|
||||
} else if (auto bytes = lua::touserdata<lua::LuaBytearray>(L, 1)) {
|
||||
return lua::pushstring(
|
||||
L,
|
||||
std::string(
|
||||
reinterpret_cast<char*>(bytes->data().data()),
|
||||
bytes->data().size()
|
||||
)
|
||||
);
|
||||
}
|
||||
return 1;
|
||||
}
|
||||
|
||||
static int l_length(lua::State* L) {
|
||||
auto string = lua::require_string(L, 1);
|
||||
return lua::pushinteger(L, util::length_utf8(string));
|
||||
}
|
||||
|
||||
static int l_codepoint(lua::State* L) {
|
||||
std::string_view string = lua::require_string(L, 1);
|
||||
if (string.empty()) {
|
||||
return lua::pushinteger(L, 0);
|
||||
}
|
||||
uint size;
|
||||
return lua::pushinteger(L, util::decode_utf8(size, string.data()));
|
||||
}
|
||||
|
||||
static int l_sub(lua::State* L) {
|
||||
auto string = util::str2u32str_utf8(lua::require_string(L, 1));
|
||||
int start = std::max(0, static_cast<int>(lua::tointeger(L, 2) - 1));
|
||||
int end = string.length();
|
||||
if (lua::gettop(L) >= 3) {
|
||||
end = std::max(0, static_cast<int>(lua::tointeger(L, 3) - 1));
|
||||
}
|
||||
return lua::pushstring(L, util::u32str2str_utf8(string.substr(start, end)));
|
||||
}
|
||||
|
||||
const luaL_Reg utf8lib[] = {
|
||||
{"tobytes", lua::wrap<l_encode>},
|
||||
{"tostring", lua::wrap<l_decode>},
|
||||
{"length", lua::wrap<l_length>},
|
||||
{"codepoint", lua::wrap<l_codepoint>},
|
||||
{"sub", lua::wrap<l_sub>},
|
||||
{NULL, NULL}
|
||||
};
|
||||
@ -51,6 +51,7 @@ static void create_libs(State* L, StateType stateType) {
|
||||
openlib(L, "quat", quatlib);
|
||||
openlib(L, "time", timelib);
|
||||
openlib(L, "toml", tomllib);
|
||||
openlib(L, "utf8", utf8lib);
|
||||
openlib(L, "vec2", vec2lib);
|
||||
openlib(L, "vec3", vec3lib);
|
||||
openlib(L, "vec4", vec4lib);
|
||||
|
||||
@ -128,7 +128,7 @@ inline uint utf8_len(ubyte cp) {
|
||||
if ((cp & 0xF8) == 0xF0) {
|
||||
return 4;
|
||||
}
|
||||
return 0;
|
||||
throw std::runtime_error("utf8 decode error");
|
||||
}
|
||||
|
||||
uint32_t util::decode_utf8(uint& size, const char* chr) {
|
||||
@ -156,6 +156,16 @@ size_t util::crop_utf8(std::string_view s, size_t maxSize) {
|
||||
return pos;
|
||||
}
|
||||
|
||||
size_t util::length_utf8(std::string_view s) {
|
||||
size_t length = 0;
|
||||
size_t pos = 0;
|
||||
while (pos < s.length()) {
|
||||
pos += utf8_len(s[pos]);
|
||||
length++;
|
||||
}
|
||||
return length;
|
||||
}
|
||||
|
||||
template<class C>
|
||||
std::string xstr2str_utf8(const std::basic_string<C>& xs) {
|
||||
std::vector<char> chars;
|
||||
|
||||
@ -44,6 +44,11 @@ namespace util {
|
||||
/// @param maxSize max encoded string length after crop
|
||||
/// @return cropped string size (less or equal to maxSize)
|
||||
size_t crop_utf8(std::string_view s, size_t maxSize);
|
||||
|
||||
/// @brief Measure utf8-encoded string length
|
||||
/// @param s source encoded string
|
||||
/// @return unicode string length (number of codepoints)
|
||||
size_t length_utf8(std::string_view s);
|
||||
|
||||
bool is_integer(const std::string& text);
|
||||
bool is_integer(const std::wstring& text);
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user