2020-05-11 16:23:16 +00:00
|
|
|
return function(utf8)
|
|
|
|
|
2020-08-04 10:28:04 +00:00
|
|
|
local utf8unicode = utf8.unicode
|
2020-05-11 16:23:16 +00:00
|
|
|
local utf8sub = utf8.sub
|
2020-08-04 10:28:04 +00:00
|
|
|
local sub = utf8.raw.sub
|
|
|
|
local byte = utf8.raw.byte
|
2020-05-11 16:23:16 +00:00
|
|
|
local utf8len = utf8.len
|
2020-08-04 10:28:04 +00:00
|
|
|
local utf8next = utf8.next
|
2020-05-11 16:23:16 +00:00
|
|
|
local rawgsub = utf8.raw.gsub
|
2020-08-04 10:28:04 +00:00
|
|
|
local utf8offset = utf8.offset
|
|
|
|
local utf8char = utf8.char
|
2020-05-11 16:23:16 +00:00
|
|
|
|
|
|
|
local util = utf8.util
|
|
|
|
|
|
|
|
local ctx = {}
|
|
|
|
local mt = {
|
|
|
|
__index = ctx,
|
|
|
|
__tostring = function(self)
|
|
|
|
return rawgsub([[str: '${str}', char: ${pos} '${char}', func: ${func_pos}]], "${(.-)}", {
|
|
|
|
str = self.str,
|
|
|
|
pos = self.pos,
|
|
|
|
char = self:get_char(),
|
|
|
|
func_pos = self.func_pos,
|
|
|
|
})
|
|
|
|
end
|
|
|
|
}
|
|
|
|
|
|
|
|
function ctx.new(obj)
|
|
|
|
obj = obj or {}
|
2020-08-04 10:28:04 +00:00
|
|
|
local res = setmetatable({
|
2020-05-11 16:23:16 +00:00
|
|
|
pos = obj.pos or 1,
|
2020-08-04 10:28:04 +00:00
|
|
|
byte_pos = obj.pos or 1,
|
|
|
|
str = assert(obj.str, "str is required"),
|
|
|
|
len = obj.len,
|
|
|
|
rawlen = obj.rawlen,
|
|
|
|
bytes = obj.bytes,
|
|
|
|
offsets = obj.offsets,
|
2020-05-11 16:23:16 +00:00
|
|
|
starts = obj.starts or nil,
|
|
|
|
functions = obj.functions or {},
|
|
|
|
func_pos = obj.func_pos or 1,
|
|
|
|
ends = obj.ends or nil,
|
|
|
|
result = obj.result and util.copy(obj.result) or {},
|
|
|
|
captures = obj.captures and util.copy(obj.captures, true) or {active = {}},
|
2020-08-04 10:28:04 +00:00
|
|
|
modified = false,
|
2020-05-11 16:23:16 +00:00
|
|
|
}, mt)
|
2020-08-04 10:28:04 +00:00
|
|
|
if not res.bytes then
|
|
|
|
local str = res.str
|
|
|
|
local l = #str
|
|
|
|
local bytes = utf8.config.int32array(l)
|
|
|
|
local offsets = utf8.config.int32array(l)
|
|
|
|
local c, bs, i = nil, 1, 1
|
|
|
|
while bs <= l do
|
|
|
|
bytes[i] = utf8unicode(str, bs, bs)
|
|
|
|
offsets[i] = bs
|
|
|
|
bs = utf8.next(str, bs)
|
|
|
|
i = i + 1
|
|
|
|
end
|
|
|
|
res.bytes = bytes
|
|
|
|
res.offsets = offsets
|
|
|
|
res.byte_pos = res.pos
|
|
|
|
res.len = i
|
|
|
|
res.rawlen = l
|
|
|
|
end
|
|
|
|
|
|
|
|
return res
|
2020-05-11 16:23:16 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
function ctx:clone()
|
|
|
|
return self:new()
|
|
|
|
end
|
|
|
|
|
|
|
|
function ctx:next_char()
|
|
|
|
self.pos = self.pos + 1
|
2020-08-04 10:28:04 +00:00
|
|
|
self.byte_pos = self.pos
|
|
|
|
end
|
|
|
|
|
|
|
|
function ctx:prev_char()
|
|
|
|
self.pos = self.pos - 1
|
|
|
|
self.byte_pos = self.pos
|
2020-05-11 16:23:16 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
function ctx:get_char()
|
2020-08-04 10:28:04 +00:00
|
|
|
if self.len <= self.pos then return "" end
|
|
|
|
return utf8char(self.bytes[self.pos])
|
2020-05-11 16:23:16 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
function ctx:get_charcode()
|
2020-08-04 10:28:04 +00:00
|
|
|
if self.len <= self.pos then return nil end
|
|
|
|
return self.bytes[self.pos]
|
2020-05-11 16:23:16 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
function ctx:next_function()
|
|
|
|
self.func_pos = self.func_pos + 1
|
|
|
|
end
|
|
|
|
|
|
|
|
function ctx:get_function()
|
|
|
|
return self.functions[self.func_pos]
|
|
|
|
end
|
|
|
|
|
|
|
|
function ctx:done()
|
|
|
|
utf8.debug('done', self)
|
|
|
|
coroutine.yield(self, self.result, self.captures)
|
|
|
|
end
|
|
|
|
|
|
|
|
function ctx:terminate()
|
|
|
|
utf8.debug('terminate', self)
|
|
|
|
coroutine.yield(nil)
|
|
|
|
end
|
|
|
|
|
|
|
|
return ctx
|
|
|
|
|
|
|
|
end
|