2020-05-11 16:23:16 +00:00
|
|
|
return function(utf8)
|
|
|
|
|
|
|
|
local utf8unicode = utf8.byte
|
|
|
|
local sub = utf8.raw.sub
|
|
|
|
|
|
|
|
local matchers = {
|
|
|
|
star = function(class, name)
|
|
|
|
local class_name = 'class' .. name
|
|
|
|
return [[
|
|
|
|
local ]] .. class_name .. [[ = ]] .. class .. [[
|
|
|
|
|
|
|
|
add(function(ctx) -- star
|
2020-08-04 10:28:04 +00:00
|
|
|
-- debug(ctx, 'star', ']] .. class_name .. [[')
|
|
|
|
local clone = ctx:clone()
|
|
|
|
while ]] .. class_name .. [[:test(clone:get_charcode()) do
|
|
|
|
clone:next_char()
|
2020-05-11 16:23:16 +00:00
|
|
|
end
|
2020-08-04 10:28:04 +00:00
|
|
|
local pos = clone.pos
|
|
|
|
while pos >= ctx.pos do
|
|
|
|
clone.pos = pos
|
|
|
|
clone.func_pos = ctx.func_pos
|
|
|
|
clone:next_function()
|
|
|
|
clone:get_function()(clone)
|
|
|
|
if clone.modified then
|
|
|
|
clone = ctx:clone()
|
|
|
|
end
|
|
|
|
pos = pos - 1
|
2020-05-11 16:23:16 +00:00
|
|
|
end
|
|
|
|
end)
|
|
|
|
]]
|
|
|
|
end,
|
|
|
|
minus = function(class, name)
|
|
|
|
local class_name = 'class' .. name
|
|
|
|
return [[
|
|
|
|
local ]] .. class_name .. [[ = ]] .. class .. [[
|
|
|
|
|
|
|
|
add(function(ctx) -- minus
|
2020-08-04 10:28:04 +00:00
|
|
|
-- debug(ctx, 'minus', ']] .. class_name .. [[')
|
2020-05-11 16:23:16 +00:00
|
|
|
|
2020-08-04 10:28:04 +00:00
|
|
|
local clone = ctx:clone()
|
|
|
|
local pos
|
2020-05-11 16:23:16 +00:00
|
|
|
repeat
|
2020-08-04 10:28:04 +00:00
|
|
|
pos = clone.pos
|
|
|
|
clone:next_function()
|
|
|
|
clone:get_function()(clone)
|
|
|
|
if clone.modified then
|
|
|
|
clone = ctx:clone()
|
|
|
|
clone.pos = pos
|
|
|
|
else
|
|
|
|
clone.pos = pos
|
|
|
|
clone.func_pos = ctx.func_pos
|
|
|
|
end
|
|
|
|
local match = ]] .. class_name .. [[:test(clone:get_charcode())
|
|
|
|
clone:next_char()
|
2020-05-11 16:23:16 +00:00
|
|
|
until not match
|
|
|
|
end)
|
|
|
|
]]
|
|
|
|
end,
|
|
|
|
question = function(class, name)
|
|
|
|
local class_name = 'class' .. name
|
|
|
|
return [[
|
|
|
|
local ]] .. class_name .. [[ = ]] .. class .. [[
|
|
|
|
|
|
|
|
add(function(ctx) -- question
|
2020-08-04 10:28:04 +00:00
|
|
|
-- debug(ctx, 'question', ']] .. class_name .. [[')
|
2020-05-11 16:23:16 +00:00
|
|
|
local saved = ctx:clone()
|
|
|
|
if ]] .. class_name .. [[:test(ctx:get_charcode()) then
|
|
|
|
ctx:next_char()
|
|
|
|
ctx:next_function()
|
|
|
|
ctx:get_function()(ctx)
|
|
|
|
end
|
|
|
|
ctx = saved
|
|
|
|
ctx:next_function()
|
|
|
|
return ctx:get_function()(ctx)
|
|
|
|
end)
|
|
|
|
]]
|
|
|
|
end,
|
|
|
|
capture_start = function(number)
|
|
|
|
return [[
|
|
|
|
add(function(ctx)
|
2020-08-04 10:28:04 +00:00
|
|
|
ctx.modified = true
|
|
|
|
-- debug(ctx, 'capture_start', ']] .. tostring(number) .. [[')
|
|
|
|
table.insert(ctx.captures.active, { id = ]] .. tostring(number) .. [[, start = ctx.pos })
|
2020-05-11 16:23:16 +00:00
|
|
|
ctx:next_function()
|
|
|
|
return ctx:get_function()(ctx)
|
|
|
|
end)
|
|
|
|
]]
|
|
|
|
end,
|
|
|
|
capture_finish = function(number)
|
|
|
|
return [[
|
|
|
|
add(function(ctx)
|
2020-08-04 10:28:04 +00:00
|
|
|
ctx.modified = true
|
|
|
|
-- debug(ctx, 'capture_finish', ']] .. tostring(number) .. [[')
|
2020-05-11 16:23:16 +00:00
|
|
|
local cap = table.remove(ctx.captures.active)
|
|
|
|
cap.finish = ctx.pos
|
2020-08-04 10:28:04 +00:00
|
|
|
local b, e = ctx.offsets[cap.start], ctx.offsets[cap.finish]
|
|
|
|
if cap.start < 1 then
|
|
|
|
b = 1
|
|
|
|
elseif cap.start >= ctx.len then
|
|
|
|
b = ctx.rawlen + 1
|
|
|
|
end
|
|
|
|
if cap.finish < 1 then
|
|
|
|
e = 1
|
|
|
|
elseif cap.finish >= ctx.len then
|
|
|
|
e = ctx.rawlen + 1
|
|
|
|
end
|
|
|
|
ctx.captures[cap.id] = rawsub(ctx.str, b, e - 1)
|
|
|
|
-- debug('capture#' .. tostring(cap.id), '[' .. tostring(cap.start).. ',' .. tostring(cap.finish) .. ']' , 'is', ctx.captures[cap.id])
|
|
|
|
ctx:next_function()
|
|
|
|
return ctx:get_function()(ctx)
|
|
|
|
end)
|
|
|
|
]]
|
|
|
|
end,
|
|
|
|
capture_position = function(number)
|
|
|
|
return [[
|
|
|
|
add(function(ctx)
|
|
|
|
ctx.modified = true
|
|
|
|
-- debug(ctx, 'capture_position', ']] .. tostring(number) .. [[')
|
|
|
|
ctx.captures[ ]] .. tostring(number) .. [[ ] = ctx.pos
|
2020-05-11 16:23:16 +00:00
|
|
|
ctx:next_function()
|
|
|
|
return ctx:get_function()(ctx)
|
|
|
|
end)
|
|
|
|
]]
|
|
|
|
end,
|
|
|
|
capture = function(number)
|
|
|
|
return [[
|
|
|
|
add(function(ctx)
|
2020-08-04 10:28:04 +00:00
|
|
|
-- debug(ctx, 'capture', ']] .. tostring(number) .. [[')
|
2020-05-11 16:23:16 +00:00
|
|
|
local cap = ctx.captures[ ]] .. tostring(number) .. [[ ]
|
|
|
|
local len = utf8len(cap)
|
|
|
|
local check = utf8sub(ctx.str, ctx.pos, ctx.pos + len - 1)
|
2020-08-04 10:28:04 +00:00
|
|
|
-- debug("capture check:", cap, check)
|
2020-05-11 16:23:16 +00:00
|
|
|
if cap == check then
|
|
|
|
ctx.pos = ctx.pos + len
|
|
|
|
ctx:next_function()
|
|
|
|
return ctx:get_function()(ctx)
|
|
|
|
end
|
|
|
|
end)
|
|
|
|
]]
|
|
|
|
end,
|
|
|
|
balancer = function(pair, name)
|
|
|
|
local class_name = 'class' .. name
|
|
|
|
return [[
|
|
|
|
|
|
|
|
add(function(ctx) -- balancer
|
|
|
|
local d, b = ]] .. tostring(utf8unicode(pair[1])) .. [[, ]] .. tostring(utf8unicode(pair[2])) .. [[
|
|
|
|
if ctx:get_charcode() ~= d then return end
|
|
|
|
local balance = 0
|
|
|
|
repeat
|
|
|
|
local c = ctx:get_charcode()
|
|
|
|
if c == nil then return end
|
|
|
|
|
|
|
|
if c == d then
|
|
|
|
balance = balance + 1
|
|
|
|
elseif c == b then
|
|
|
|
balance = balance - 1
|
|
|
|
end
|
2020-08-04 10:28:04 +00:00
|
|
|
-- debug("balancer: balance=", balance, ", d=", d, ", b=", b, ", charcode=", ctx:get_charcode())
|
2020-05-11 16:23:16 +00:00
|
|
|
ctx:next_char()
|
2020-08-04 10:28:04 +00:00
|
|
|
until balance == 0 or (balance == 2 and d == b)
|
2020-05-11 16:23:16 +00:00
|
|
|
ctx:next_function()
|
|
|
|
return ctx:get_function()(ctx)
|
|
|
|
end)
|
|
|
|
]]
|
|
|
|
end,
|
|
|
|
simple = utf8:require("modifier.compiletime.simple").simple,
|
|
|
|
}
|
|
|
|
|
|
|
|
local next = utf8.util.next
|
|
|
|
|
|
|
|
local function parse(regex, c, bs, ctx)
|
|
|
|
local functions, nbs = nil, bs
|
|
|
|
if c == '%' then
|
|
|
|
c, nbs = next(regex, bs)
|
|
|
|
utf8.debug("next", c, bs)
|
2020-08-04 10:28:04 +00:00
|
|
|
if c == '' then
|
|
|
|
error("malformed pattern (ends with '%')")
|
|
|
|
end
|
2020-05-11 16:23:16 +00:00
|
|
|
if utf8.raw.find('123456789', c, 1, true) then
|
|
|
|
functions = { matchers.capture(tonumber(c)) }
|
|
|
|
nbs = utf8.next(regex, nbs)
|
|
|
|
elseif c == 'b' then
|
|
|
|
local d, b
|
|
|
|
d, nbs = next(regex, nbs)
|
|
|
|
b, nbs = next(regex, nbs)
|
2020-08-04 10:28:04 +00:00
|
|
|
assert(d ~= '' and b ~= '', "unbalanced pattern")
|
2020-05-11 16:23:16 +00:00
|
|
|
functions = { matchers.balancer({d, b}, tostring(bs)) }
|
|
|
|
nbs = utf8.next(regex, nbs)
|
|
|
|
end
|
|
|
|
|
|
|
|
if functions and ctx.prev_class then
|
|
|
|
table.insert(functions, 1, matchers.simple(ctx.prev_class, tostring(bs)))
|
|
|
|
end
|
|
|
|
elseif c == '*' and ctx.prev_class then
|
|
|
|
functions = {
|
|
|
|
matchers.star(
|
|
|
|
ctx.prev_class,
|
|
|
|
tostring(bs)
|
|
|
|
)
|
|
|
|
}
|
|
|
|
nbs = bs + 1
|
|
|
|
elseif c == '+' and ctx.prev_class then
|
|
|
|
functions = {
|
|
|
|
matchers.simple(
|
|
|
|
ctx.prev_class,
|
|
|
|
tostring(bs)
|
|
|
|
),
|
|
|
|
matchers.star(
|
|
|
|
ctx.prev_class,
|
|
|
|
tostring(bs)
|
|
|
|
)
|
|
|
|
}
|
|
|
|
nbs = bs + 1
|
|
|
|
elseif c == '-' and ctx.prev_class then
|
|
|
|
functions = {
|
|
|
|
matchers.minus(
|
|
|
|
ctx.prev_class,
|
|
|
|
tostring(bs)
|
|
|
|
)
|
|
|
|
}
|
|
|
|
nbs = bs + 1
|
|
|
|
elseif c == '?' and ctx.prev_class then
|
|
|
|
functions = {
|
|
|
|
matchers.question(
|
|
|
|
ctx.prev_class,
|
|
|
|
tostring(bs)
|
|
|
|
)
|
|
|
|
}
|
|
|
|
nbs = bs + 1
|
|
|
|
elseif c == '(' then
|
|
|
|
ctx.capture = ctx.capture or {balance = 0, id = 0}
|
|
|
|
ctx.capture.id = ctx.capture.id + 1
|
2020-08-04 10:28:04 +00:00
|
|
|
local nc = next(regex, nbs)
|
|
|
|
if nc == ')' then
|
|
|
|
functions = {matchers.capture_position(ctx.capture.id)}
|
|
|
|
nbs = bs + 2
|
|
|
|
else
|
|
|
|
ctx.capture.balance = ctx.capture.balance + 1
|
|
|
|
functions = {matchers.capture_start(ctx.capture.id)}
|
|
|
|
nbs = bs + 1
|
|
|
|
end
|
2020-05-11 16:23:16 +00:00
|
|
|
if ctx.prev_class then
|
|
|
|
table.insert(functions, 1, matchers.simple(ctx.prev_class, tostring(bs)))
|
|
|
|
end
|
|
|
|
elseif c == ')' then
|
|
|
|
ctx.capture = ctx.capture or {balance = 0, id = 0}
|
|
|
|
functions = { matchers.capture_finish(ctx.capture.id) }
|
|
|
|
|
|
|
|
ctx.capture.balance = ctx.capture.balance - 1
|
|
|
|
assert(ctx.capture.balance >= 0, 'invalid capture: "(" missing')
|
|
|
|
|
|
|
|
if ctx.prev_class then
|
|
|
|
table.insert(functions, 1, matchers.simple(ctx.prev_class, tostring(bs)))
|
|
|
|
end
|
|
|
|
nbs = bs + 1
|
|
|
|
end
|
|
|
|
|
|
|
|
return functions, nbs - bs
|
|
|
|
end
|
|
|
|
|
|
|
|
local function check(ctx)
|
|
|
|
if ctx.capture then assert(ctx.capture.balance == 0, 'invalid capture: ")" missing') end
|
|
|
|
end
|
|
|
|
|
|
|
|
return {
|
|
|
|
parse = parse,
|
|
|
|
check = check,
|
|
|
|
}
|
|
|
|
|
|
|
|
end
|