VoxelEngine/res/modules/bitwise/tokenizer.lua
2025-04-06 22:16:04 +09:00

228 lines
4.9 KiB
Lua

local util = require "core:bitwise/util"
local operators = "><|&~^()"
local digits = "0123456789ABCDEF"
local startingDigits = "0123456789"
local idChars = "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM0123456789_"
local spaces = " \t"
local function esc(x)
return (x:gsub('%%', '%%%%')
:gsub('^%^', '%%^')
:gsub('%$$', '%%$')
:gsub('%(', '%%(')
:gsub('%)', '%%)')
:gsub('%.', '%%.')
:gsub('%[', '%%[')
:gsub('%]', '%%]')
:gsub('%*', '%%*')
:gsub('%+', '%%+')
:gsub('%-', '%%-')
:gsub('%?', '%%?'))
end
local decNum = "DECIMAL"
local hexNum = "HEXADECIMAL"
local binNum = "BINARY"
local rshift = "RIGHT SHIFT"
local lshift = "LEFT SHIFT"
local bor = "OR"
local band = "AND"
local bxor = "XOR"
local bnot = "NOT"
local id = "ID"
local openingBracket = "OPENING BRACKET"
local closingBracket = "CLOSING BRACKET"
local opToType =
{
['^'] = bxor,
['|'] = bor,
['&'] = band,
['('] = openingBracket,
[')'] = closingBracket,
['~'] = bnot
}
local typeToOp =
{
[bxor] = '^',
[bor] = '|',
[band] = '&',
[openingBracket] = '(',
[closingBracket] = ')',
[bnot] = '~'
}
local function contains(str, char)
return string.find(str, esc(char)) ~= nil
end
local function checkDigitSystem(column, digit, base)
local i = digits:find(digit)
if not i or i > base then
util.throw(column, "the digit '"..digit.."' does not belong to the "..base.."-based number system")
end
end
local function isEndOfNumOrId(i, set, next, len)
if i == len or not contains(set, next) then
if i ~= len and not contains(operators, next) and not contains(spaces, next) then
util.throw(i + 1, "operator or space expected")
else
return true
end
else
return false
end
end
local function tokenize(str)
local tokens = { }
local buffer = ""
local numType, readingNum, readingId
local numSys
local i = 1
while i <= #str do
local char = str:sub(i, i)
local upChar = string.upper(char)
local next = str:sub(i + 1, i + 1)
if contains(operators, char) then
local type
if char == '>' or char == '<' then
if next == char then
type = char == '>' and rshift or lshift
i = i + 1
else
util.throw(i, "invalid operator")
end
elseif char == '~' then
if contains(idChars, next) then
type = bnot
else
util.throw(i, "number expected")
end
else
type = opToType[char]
end
table.insert(tokens, { column = i, type = type })
elseif ((not readingNum and contains(startingDigits, char)) or (readingNum and contains(digits, upChar))) and not readingId then
if not readingNum then
readingNum = true
if char == '0' and (next == 'x' or next == 'b') then
numType = next == 'x' and hexNum or binNum
numSys = next == 'x' and 16 or 2
i = i + 2
if not str:sub(i, i) then
util.throw(i, "unexpected end")
end
char = str:sub(i, i)
upChar = string.upper(char)
next = str:sub(i + 1, i + 1)
else
numType = decNum
numSys = 10
end
end
if readingNum then
if numType == hexNum then
char, next = upChar, string.upper(next)
end
checkDigitSystem(i, char, numSys)
buffer = buffer..char
if isEndOfNumOrId(i, digits, next, #str) then
readingNum = false
table.insert(tokens, { column = i, type = numType, value = buffer })
buffer = ""
end
end
elseif contains(idChars, char) then
if not readingId then
readingId = true
end
if readingId then
buffer = buffer..char
if isEndOfNumOrId(i, idChars, next, #str) then
readingId = false
table.insert(tokens, { column = i, type = id, value = buffer })
buffer = ""
end
end
else
local space
for j = 1, #spaces do
if spaces:sub(j, j) == char then
space = true
break
end
end
if not space then
util.throw(i, "undefined token: \""..char.."\"")
end
end
i = i + 1
end
return tokens
end
local function printTokens(tokens)
for _, token in ipairs(tokens) do
local str = "{ "
for k, v in pairs(token) do
str = str..k..' = '..v..', '
end
print(str:sub(1, #str - 2).." }")
end
end
return
{
operators = operators,
digits = digits,
startingDigits = startingDigits,
idChars = idChars,
operatorTypes = { lshift, rshift, bnot, band, bxor, bor },
numTypes = { decNum, hexNum, binNum },
types = {
decNum = decNum,
hexNum = hexNum,
binNum = binNum,
rshift = rshift,
lshift = lshift,
bor = bor,
band = band,
bxor = bxor,
bnot = bnot,
id = id,
openingBracket = openingBracket,
closingBracket = closingBracket
},
opToType = opToType,
typeToOp = typeToOp,
get_tokens = tokenize,
print_tokens = printTokens
}