228 lines
4.9 KiB
Lua
228 lines
4.9 KiB
Lua
local util = require "core:bitwise/util"
|
|
|
|
local operators = "><|&~^()"
|
|
local digits = "0123456789ABCDEF"
|
|
local startingDigits = "0123456789"
|
|
local idChars = "qwertyuiopasdfghjklzxcvbnmQWERTYUIOPASDFGHJKLZXCVBNM0123456789_"
|
|
local spaces = " \t"
|
|
|
|
local function esc(x)
|
|
return (x:gsub('%%', '%%%%')
|
|
:gsub('^%^', '%%^')
|
|
:gsub('%$$', '%%$')
|
|
:gsub('%(', '%%(')
|
|
:gsub('%)', '%%)')
|
|
:gsub('%.', '%%.')
|
|
:gsub('%[', '%%[')
|
|
:gsub('%]', '%%]')
|
|
:gsub('%*', '%%*')
|
|
:gsub('%+', '%%+')
|
|
:gsub('%-', '%%-')
|
|
:gsub('%?', '%%?'))
|
|
end
|
|
|
|
local decNum = "DECIMAL"
|
|
local hexNum = "HEXADECIMAL"
|
|
local binNum = "BINARY"
|
|
|
|
local rshift = "RIGHT SHIFT"
|
|
local lshift = "LEFT SHIFT"
|
|
local bor = "OR"
|
|
local band = "AND"
|
|
local bxor = "XOR"
|
|
local bnot = "NOT"
|
|
local id = "ID"
|
|
local openingBracket = "OPENING BRACKET"
|
|
local closingBracket = "CLOSING BRACKET"
|
|
|
|
local opToType =
|
|
{
|
|
['^'] = bxor,
|
|
['|'] = bor,
|
|
['&'] = band,
|
|
['('] = openingBracket,
|
|
[')'] = closingBracket,
|
|
['~'] = bnot
|
|
}
|
|
|
|
local typeToOp =
|
|
{
|
|
[bxor] = '^',
|
|
[bor] = '|',
|
|
[band] = '&',
|
|
[openingBracket] = '(',
|
|
[closingBracket] = ')',
|
|
[bnot] = '~'
|
|
}
|
|
|
|
local function contains(str, char)
|
|
return string.find(str, esc(char)) ~= nil
|
|
end
|
|
|
|
local function checkDigitSystem(column, digit, base)
|
|
local i = digits:find(digit)
|
|
|
|
if not i or i > base then
|
|
util.throw(column, "the digit '"..digit.."' does not belong to the "..base.."-based number system")
|
|
end
|
|
end
|
|
|
|
local function isEndOfNumOrId(i, set, next, len)
|
|
if i == len or not contains(set, next) then
|
|
if i ~= len and not contains(operators, next) and not contains(spaces, next) then
|
|
util.throw(i + 1, "operator or space expected")
|
|
else
|
|
return true
|
|
end
|
|
else
|
|
return false
|
|
end
|
|
end
|
|
|
|
local function tokenize(str)
|
|
local tokens = { }
|
|
|
|
local buffer = ""
|
|
local numType, readingNum, readingId
|
|
local numSys
|
|
|
|
local i = 1
|
|
|
|
while i <= #str do
|
|
local char = str:sub(i, i)
|
|
local upChar = string.upper(char)
|
|
local next = str:sub(i + 1, i + 1)
|
|
|
|
if contains(operators, char) then
|
|
local type
|
|
|
|
if char == '>' or char == '<' then
|
|
if next == char then
|
|
type = char == '>' and rshift or lshift
|
|
i = i + 1
|
|
else
|
|
util.throw(i, "invalid operator")
|
|
end
|
|
elseif char == '~' then
|
|
if contains(idChars, next) then
|
|
type = bnot
|
|
else
|
|
util.throw(i, "number expected")
|
|
end
|
|
else
|
|
type = opToType[char]
|
|
end
|
|
|
|
table.insert(tokens, { column = i, type = type })
|
|
elseif ((not readingNum and contains(startingDigits, char)) or (readingNum and contains(digits, upChar))) and not readingId then
|
|
if not readingNum then
|
|
readingNum = true
|
|
|
|
if char == '0' and (next == 'x' or next == 'b') then
|
|
numType = next == 'x' and hexNum or binNum
|
|
numSys = next == 'x' and 16 or 2
|
|
|
|
i = i + 2
|
|
|
|
if not str:sub(i, i) then
|
|
util.throw(i, "unexpected end")
|
|
end
|
|
|
|
char = str:sub(i, i)
|
|
upChar = string.upper(char)
|
|
next = str:sub(i + 1, i + 1)
|
|
else
|
|
numType = decNum
|
|
numSys = 10
|
|
end
|
|
end
|
|
|
|
if readingNum then
|
|
if numType == hexNum then
|
|
char, next = upChar, string.upper(next)
|
|
end
|
|
|
|
checkDigitSystem(i, char, numSys)
|
|
|
|
buffer = buffer..char
|
|
|
|
if isEndOfNumOrId(i, digits, next, #str) then
|
|
readingNum = false
|
|
table.insert(tokens, { column = i, type = numType, value = buffer })
|
|
buffer = ""
|
|
end
|
|
end
|
|
elseif contains(idChars, char) then
|
|
if not readingId then
|
|
readingId = true
|
|
end
|
|
|
|
if readingId then
|
|
buffer = buffer..char
|
|
|
|
if isEndOfNumOrId(i, idChars, next, #str) then
|
|
readingId = false
|
|
table.insert(tokens, { column = i, type = id, value = buffer })
|
|
buffer = ""
|
|
end
|
|
end
|
|
else
|
|
local space
|
|
|
|
for j = 1, #spaces do
|
|
if spaces:sub(j, j) == char then
|
|
space = true
|
|
break
|
|
end
|
|
end
|
|
|
|
if not space then
|
|
util.throw(i, "undefined token: \""..char.."\"")
|
|
end
|
|
end
|
|
|
|
i = i + 1
|
|
end
|
|
|
|
return tokens
|
|
end
|
|
|
|
local function printTokens(tokens)
|
|
for _, token in ipairs(tokens) do
|
|
local str = "{ "
|
|
|
|
for k, v in pairs(token) do
|
|
str = str..k..' = '..v..', '
|
|
end
|
|
|
|
print(str:sub(1, #str - 2).." }")
|
|
end
|
|
end
|
|
|
|
return
|
|
{
|
|
operators = operators,
|
|
digits = digits,
|
|
startingDigits = startingDigits,
|
|
idChars = idChars,
|
|
operatorTypes = { lshift, rshift, bnot, band, bxor, bor },
|
|
numTypes = { decNum, hexNum, binNum },
|
|
types = {
|
|
decNum = decNum,
|
|
hexNum = hexNum,
|
|
binNum = binNum,
|
|
rshift = rshift,
|
|
lshift = lshift,
|
|
bor = bor,
|
|
band = band,
|
|
bxor = bxor,
|
|
bnot = bnot,
|
|
id = id,
|
|
openingBracket = openingBracket,
|
|
closingBracket = closingBracket
|
|
},
|
|
opToType = opToType,
|
|
typeToOp = typeToOp,
|
|
get_tokens = tokenize,
|
|
print_tokens = printTokens
|
|
} |