LoveFrames/loveframes/third-party/utf8/modifier/compiletime/vanilla.lua
2020-08-04 11:28:04 +01:00

271 lines
7.0 KiB
Lua

return function(utf8)
local utf8unicode = utf8.byte
local sub = utf8.raw.sub
local matchers = {
star = function(class, name)
local class_name = 'class' .. name
return [[
local ]] .. class_name .. [[ = ]] .. class .. [[
add(function(ctx) -- star
-- debug(ctx, 'star', ']] .. class_name .. [[')
local clone = ctx:clone()
while ]] .. class_name .. [[:test(clone:get_charcode()) do
clone:next_char()
end
local pos = clone.pos
while pos >= ctx.pos do
clone.pos = pos
clone.func_pos = ctx.func_pos
clone:next_function()
clone:get_function()(clone)
if clone.modified then
clone = ctx:clone()
end
pos = pos - 1
end
end)
]]
end,
minus = function(class, name)
local class_name = 'class' .. name
return [[
local ]] .. class_name .. [[ = ]] .. class .. [[
add(function(ctx) -- minus
-- debug(ctx, 'minus', ']] .. class_name .. [[')
local clone = ctx:clone()
local pos
repeat
pos = clone.pos
clone:next_function()
clone:get_function()(clone)
if clone.modified then
clone = ctx:clone()
clone.pos = pos
else
clone.pos = pos
clone.func_pos = ctx.func_pos
end
local match = ]] .. class_name .. [[:test(clone:get_charcode())
clone:next_char()
until not match
end)
]]
end,
question = function(class, name)
local class_name = 'class' .. name
return [[
local ]] .. class_name .. [[ = ]] .. class .. [[
add(function(ctx) -- question
-- debug(ctx, 'question', ']] .. class_name .. [[')
local saved = ctx:clone()
if ]] .. class_name .. [[:test(ctx:get_charcode()) then
ctx:next_char()
ctx:next_function()
ctx:get_function()(ctx)
end
ctx = saved
ctx:next_function()
return ctx:get_function()(ctx)
end)
]]
end,
capture_start = function(number)
return [[
add(function(ctx)
ctx.modified = true
-- debug(ctx, 'capture_start', ']] .. tostring(number) .. [[')
table.insert(ctx.captures.active, { id = ]] .. tostring(number) .. [[, start = ctx.pos })
ctx:next_function()
return ctx:get_function()(ctx)
end)
]]
end,
capture_finish = function(number)
return [[
add(function(ctx)
ctx.modified = true
-- debug(ctx, 'capture_finish', ']] .. tostring(number) .. [[')
local cap = table.remove(ctx.captures.active)
cap.finish = ctx.pos
local b, e = ctx.offsets[cap.start], ctx.offsets[cap.finish]
if cap.start < 1 then
b = 1
elseif cap.start >= ctx.len then
b = ctx.rawlen + 1
end
if cap.finish < 1 then
e = 1
elseif cap.finish >= ctx.len then
e = ctx.rawlen + 1
end
ctx.captures[cap.id] = rawsub(ctx.str, b, e - 1)
-- debug('capture#' .. tostring(cap.id), '[' .. tostring(cap.start).. ',' .. tostring(cap.finish) .. ']' , 'is', ctx.captures[cap.id])
ctx:next_function()
return ctx:get_function()(ctx)
end)
]]
end,
capture_position = function(number)
return [[
add(function(ctx)
ctx.modified = true
-- debug(ctx, 'capture_position', ']] .. tostring(number) .. [[')
ctx.captures[ ]] .. tostring(number) .. [[ ] = ctx.pos
ctx:next_function()
return ctx:get_function()(ctx)
end)
]]
end,
capture = function(number)
return [[
add(function(ctx)
-- debug(ctx, 'capture', ']] .. tostring(number) .. [[')
local cap = ctx.captures[ ]] .. tostring(number) .. [[ ]
local len = utf8len(cap)
local check = utf8sub(ctx.str, ctx.pos, ctx.pos + len - 1)
-- debug("capture check:", cap, check)
if cap == check then
ctx.pos = ctx.pos + len
ctx:next_function()
return ctx:get_function()(ctx)
end
end)
]]
end,
balancer = function(pair, name)
local class_name = 'class' .. name
return [[
add(function(ctx) -- balancer
local d, b = ]] .. tostring(utf8unicode(pair[1])) .. [[, ]] .. tostring(utf8unicode(pair[2])) .. [[
if ctx:get_charcode() ~= d then return end
local balance = 0
repeat
local c = ctx:get_charcode()
if c == nil then return end
if c == d then
balance = balance + 1
elseif c == b then
balance = balance - 1
end
-- debug("balancer: balance=", balance, ", d=", d, ", b=", b, ", charcode=", ctx:get_charcode())
ctx:next_char()
until balance == 0 or (balance == 2 and d == b)
ctx:next_function()
return ctx:get_function()(ctx)
end)
]]
end,
simple = utf8:require("modifier.compiletime.simple").simple,
}
local next = utf8.util.next
local function parse(regex, c, bs, ctx)
local functions, nbs = nil, bs
if c == '%' then
c, nbs = next(regex, bs)
utf8.debug("next", c, bs)
if c == '' then
error("malformed pattern (ends with '%')")
end
if utf8.raw.find('123456789', c, 1, true) then
functions = { matchers.capture(tonumber(c)) }
nbs = utf8.next(regex, nbs)
elseif c == 'b' then
local d, b
d, nbs = next(regex, nbs)
b, nbs = next(regex, nbs)
assert(d ~= '' and b ~= '', "unbalanced pattern")
functions = { matchers.balancer({d, b}, tostring(bs)) }
nbs = utf8.next(regex, nbs)
end
if functions and ctx.prev_class then
table.insert(functions, 1, matchers.simple(ctx.prev_class, tostring(bs)))
end
elseif c == '*' and ctx.prev_class then
functions = {
matchers.star(
ctx.prev_class,
tostring(bs)
)
}
nbs = bs + 1
elseif c == '+' and ctx.prev_class then
functions = {
matchers.simple(
ctx.prev_class,
tostring(bs)
),
matchers.star(
ctx.prev_class,
tostring(bs)
)
}
nbs = bs + 1
elseif c == '-' and ctx.prev_class then
functions = {
matchers.minus(
ctx.prev_class,
tostring(bs)
)
}
nbs = bs + 1
elseif c == '?' and ctx.prev_class then
functions = {
matchers.question(
ctx.prev_class,
tostring(bs)
)
}
nbs = bs + 1
elseif c == '(' then
ctx.capture = ctx.capture or {balance = 0, id = 0}
ctx.capture.id = ctx.capture.id + 1
local nc = next(regex, nbs)
if nc == ')' then
functions = {matchers.capture_position(ctx.capture.id)}
nbs = bs + 2
else
ctx.capture.balance = ctx.capture.balance + 1
functions = {matchers.capture_start(ctx.capture.id)}
nbs = bs + 1
end
if ctx.prev_class then
table.insert(functions, 1, matchers.simple(ctx.prev_class, tostring(bs)))
end
elseif c == ')' then
ctx.capture = ctx.capture or {balance = 0, id = 0}
functions = { matchers.capture_finish(ctx.capture.id) }
ctx.capture.balance = ctx.capture.balance - 1
assert(ctx.capture.balance >= 0, 'invalid capture: "(" missing')
if ctx.prev_class then
table.insert(functions, 1, matchers.simple(ctx.prev_class, tostring(bs)))
end
nbs = bs + 1
end
return functions, nbs - bs
end
local function check(ctx)
if ctx.capture then assert(ctx.capture.balance == 0, 'invalid capture: ")" missing') end
end
return {
parse = parse,
check = check,
}
end