mirror of
https://github.com/linux-man/LoveFrames.git
synced 2024-11-18 16:04:22 +00:00
utf8 lib update
This commit is contained in:
parent
36e3d5874c
commit
72886439e7
@ -7,7 +7,7 @@ utf8.config.begins = utf8.config.begins or {
|
||||
function utf8.regex.compiletime.begins.parse(regex, c, bs, ctx)
|
||||
for _, m in ipairs(utf8.config.begins) do
|
||||
local functions, move = m.parse(regex, c, bs, ctx)
|
||||
utf8.debug("begins", _, c, bs, nbs, move, functions)
|
||||
utf8.debug("begins", _, c, bs, move, functions)
|
||||
if functions then
|
||||
return functions, move
|
||||
end
|
||||
|
@ -4,17 +4,14 @@ local matchers = {
|
||||
sliding = function()
|
||||
return [[
|
||||
add(function(ctx) -- sliding
|
||||
local saved = ctx:clone()
|
||||
local start_pos = ctx.pos
|
||||
while ctx.pos <= 1 + utf8len(ctx.str) do
|
||||
debug('starting from', ctx, "start_pos", start_pos)
|
||||
ctx.result.start = ctx.pos
|
||||
ctx:next_function()
|
||||
ctx:get_function()(ctx)
|
||||
while ctx.pos <= ctx.len do
|
||||
local clone = ctx:clone()
|
||||
-- debug('starting from', clone, "start_pos", clone.pos)
|
||||
clone.result.start = clone.pos
|
||||
clone:next_function()
|
||||
clone:get_function()(clone)
|
||||
|
||||
ctx = saved:clone()
|
||||
start_pos = start_pos + 1
|
||||
ctx.pos = start_pos
|
||||
ctx:next_char()
|
||||
end
|
||||
ctx:terminate()
|
||||
end)
|
||||
@ -23,7 +20,9 @@ local matchers = {
|
||||
fromstart = function(ctx)
|
||||
return [[
|
||||
add(function(ctx) -- fromstart
|
||||
local saved = ctx:clone()
|
||||
if ctx.byte_pos > ctx.len then
|
||||
return
|
||||
end
|
||||
ctx.result.start = ctx.pos
|
||||
ctx:next_function()
|
||||
ctx:get_function()(ctx)
|
||||
|
@ -89,6 +89,9 @@ function builder:include(b)
|
||||
end
|
||||
|
||||
function builder:build()
|
||||
if self.codes and #self.codes == 1 and not self.inverted and not self.ranges and not self.classes and not self.not_classes and not self.includes then
|
||||
return "{test = function(self, cc) return cc == " .. self.codes[1] .. " end}"
|
||||
else
|
||||
local codes_list = table.concat(self.codes or {}, ', ')
|
||||
local ranges_list = ''
|
||||
for i, r in ipairs(self.ranges or {}) do ranges_list = ranges_list .. (i > 1 and ', {' or '{') .. tostring(r[1]) .. ', ' .. tostring(r[2]) .. '}' end
|
||||
@ -118,6 +121,7 @@ function builder:build()
|
||||
|
||||
return src
|
||||
end
|
||||
end
|
||||
|
||||
return builder
|
||||
|
||||
|
@ -28,7 +28,7 @@ return function(str, c, bs, ctx)
|
||||
if c == '%' then
|
||||
c, nbs = next(str, nbs)
|
||||
r2 = c
|
||||
elseif c ~= '' then
|
||||
elseif c ~= '' and c ~= ']' then
|
||||
r2 = c
|
||||
end
|
||||
|
||||
|
@ -16,6 +16,9 @@ local function parse(str, c, bs, ctx)
|
||||
|
||||
if c == '%' then
|
||||
c, nbs = next(str, bs)
|
||||
if c == '' then
|
||||
error("malformed pattern (ends with '%')")
|
||||
end
|
||||
local _c = utf8.raw.lower(c)
|
||||
local matched
|
||||
if _c == 'a' then
|
||||
@ -46,8 +49,15 @@ local function parse(str, c, bs, ctx)
|
||||
else
|
||||
class = cl.new():with_classes(matched)
|
||||
end
|
||||
elseif _c == 'z' then
|
||||
class = cl.new():with_codes(0)
|
||||
if _c ~= c then
|
||||
class = class:invert()
|
||||
end
|
||||
elseif c == '[' then
|
||||
else
|
||||
class = cl.new():with_codes(c)
|
||||
end
|
||||
elseif c == '[' and not ctx.internal then
|
||||
local old_internal = ctx.internal
|
||||
ctx.internal = true
|
||||
class = cl.new()
|
||||
@ -58,9 +68,18 @@ local function parse(str, c, bs, ctx)
|
||||
utf8.debug("next", tttt, c, nbs)
|
||||
if c == '^' and firstletter then
|
||||
class:invert()
|
||||
local nc, nnbs = next(str, nbs)
|
||||
if nc == ']' then
|
||||
class:with_codes(nc)
|
||||
nbs = nnbs
|
||||
end
|
||||
elseif c == ']' then
|
||||
if firstletter then
|
||||
class:with_codes(c)
|
||||
else
|
||||
utf8.debug('] on pos', tttt, nbs)
|
||||
break
|
||||
end
|
||||
elseif c == '' then
|
||||
error "malformed pattern (missing ']')"
|
||||
else
|
||||
|
@ -71,7 +71,7 @@ function class:with_subs(...)
|
||||
end
|
||||
|
||||
function class:in_codes(item)
|
||||
if not self.codes then return false end
|
||||
if not self.codes or #self.codes == 0 then return nil end
|
||||
|
||||
local head, tail = 1, #self.codes
|
||||
local mid = math.floor((head + tail)/2)
|
||||
@ -93,7 +93,7 @@ function class:in_codes(item)
|
||||
end
|
||||
|
||||
function class:in_ranges(char_code)
|
||||
if not self.ranges then return false end
|
||||
if not self.ranges or #self.ranges == 0 then return nil end
|
||||
|
||||
for _,r in ipairs(self.ranges) do
|
||||
if r[1] <= char_code and char_code <= r[2] then
|
||||
@ -104,7 +104,7 @@ function class:in_ranges(char_code)
|
||||
end
|
||||
|
||||
function class:in_classes(char_code)
|
||||
if not self.classes then return false end
|
||||
if not self.classes or #self.classes == 0 then return nil end
|
||||
|
||||
for _, class in ipairs(self.classes) do
|
||||
if self:is(class, char_code) then
|
||||
@ -115,7 +115,7 @@ function class:in_classes(char_code)
|
||||
end
|
||||
|
||||
function class:in_not_classes(char_code)
|
||||
if not self.not_classes then return false end
|
||||
if not self.not_classes or #self.not_classes == 0 then return nil end
|
||||
|
||||
for _, class in ipairs(self.not_classes) do
|
||||
if self:is(class, char_code) then
|
||||
@ -130,7 +130,7 @@ function class:is(class, char_code)
|
||||
end
|
||||
|
||||
function class:in_subs(char_code)
|
||||
if not self.subs or #self.subs == 0 then return false end
|
||||
if not self.subs or #self.subs == 0 then return nil end
|
||||
|
||||
for _, c in ipairs(self.subs) do
|
||||
if not c:test(char_code) then
|
||||
@ -142,20 +142,40 @@ end
|
||||
|
||||
function class:test(char_code)
|
||||
local result = self:do_test(char_code)
|
||||
utf8.debug('class:test', result, "'" .. (char_code and utf8.char(char_code) or 'nil') .. "'", char_code)
|
||||
-- utf8.debug('class:test', result, "'" .. (char_code and utf8.char(char_code) or 'nil') .. "'", char_code)
|
||||
return result
|
||||
end
|
||||
|
||||
function class:do_test(char_code)
|
||||
if not char_code then return false end
|
||||
local found = (self:in_codes(char_code) or self:in_ranges(char_code) or self:in_classes(char_code) or self:in_subs(char_code)) and not self:in_not_classes(char_code)
|
||||
utf8.debug('class:do_test', 'found', found, 'inverted', self.inverted, 'result', self.inverted and not found or found)
|
||||
-- utf8.debug(self:in_codes(char_code), self:in_ranges(char_code), self:in_classes(char_code), self:in_subs(char_code), not self:in_not_classes(char_code))
|
||||
-- ternary if ideom (self.inverted and not found or found) doesn't work with booleans >_<
|
||||
if self.inverted then
|
||||
return not found
|
||||
local in_not_classes = self:in_not_classes(char_code)
|
||||
if in_not_classes then
|
||||
return not not self.inverted
|
||||
end
|
||||
local in_codes = self:in_codes(char_code)
|
||||
if in_codes then
|
||||
return not self.inverted
|
||||
end
|
||||
local in_ranges = self:in_ranges(char_code)
|
||||
if in_ranges then
|
||||
return not self.inverted
|
||||
end
|
||||
local in_classes = self:in_classes(char_code)
|
||||
if in_classes then
|
||||
return not self.inverted
|
||||
end
|
||||
local in_subs = self:in_subs(char_code)
|
||||
if in_subs then
|
||||
return not self.inverted
|
||||
end
|
||||
if (in_codes == nil)
|
||||
and (in_ranges == nil)
|
||||
and (in_classes == nil)
|
||||
and (in_subs == nil)
|
||||
and (in_not_classes == false) then
|
||||
return not self.inverted
|
||||
else
|
||||
return found
|
||||
return not not self.inverted
|
||||
end
|
||||
end
|
||||
|
||||
|
50
loveframes/third-party/utf8/context/runtime.lua
vendored
50
loveframes/third-party/utf8/context/runtime.lua
vendored
@ -1,9 +1,14 @@
|
||||
return function(utf8)
|
||||
|
||||
local utf8unicode = utf8.byte
|
||||
local utf8unicode = utf8.unicode
|
||||
local utf8sub = utf8.sub
|
||||
local sub = utf8.raw.sub
|
||||
local byte = utf8.raw.byte
|
||||
local utf8len = utf8.len
|
||||
local utf8next = utf8.next
|
||||
local rawgsub = utf8.raw.gsub
|
||||
local utf8offset = utf8.offset
|
||||
local utf8char = utf8.char
|
||||
|
||||
local util = utf8.util
|
||||
|
||||
@ -22,16 +27,42 @@ local mt = {
|
||||
|
||||
function ctx.new(obj)
|
||||
obj = obj or {}
|
||||
return setmetatable({
|
||||
local res = setmetatable({
|
||||
pos = obj.pos or 1,
|
||||
str = obj.str or nil,
|
||||
byte_pos = obj.pos or 1,
|
||||
str = assert(obj.str, "str is required"),
|
||||
len = obj.len,
|
||||
rawlen = obj.rawlen,
|
||||
bytes = obj.bytes,
|
||||
offsets = obj.offsets,
|
||||
starts = obj.starts or nil,
|
||||
functions = obj.functions or {},
|
||||
func_pos = obj.func_pos or 1,
|
||||
ends = obj.ends or nil,
|
||||
result = obj.result and util.copy(obj.result) or {},
|
||||
captures = obj.captures and util.copy(obj.captures, true) or {active = {}},
|
||||
modified = false,
|
||||
}, mt)
|
||||
if not res.bytes then
|
||||
local str = res.str
|
||||
local l = #str
|
||||
local bytes = utf8.config.int32array(l)
|
||||
local offsets = utf8.config.int32array(l)
|
||||
local c, bs, i = nil, 1, 1
|
||||
while bs <= l do
|
||||
bytes[i] = utf8unicode(str, bs, bs)
|
||||
offsets[i] = bs
|
||||
bs = utf8.next(str, bs)
|
||||
i = i + 1
|
||||
end
|
||||
res.bytes = bytes
|
||||
res.offsets = offsets
|
||||
res.byte_pos = res.pos
|
||||
res.len = i
|
||||
res.rawlen = l
|
||||
end
|
||||
|
||||
return res
|
||||
end
|
||||
|
||||
function ctx:clone()
|
||||
@ -40,15 +71,22 @@ end
|
||||
|
||||
function ctx:next_char()
|
||||
self.pos = self.pos + 1
|
||||
self.byte_pos = self.pos
|
||||
end
|
||||
|
||||
function ctx:prev_char()
|
||||
self.pos = self.pos - 1
|
||||
self.byte_pos = self.pos
|
||||
end
|
||||
|
||||
function ctx:get_char()
|
||||
return utf8sub(self.str, self.pos, self.pos)
|
||||
if self.len <= self.pos then return "" end
|
||||
return utf8char(self.bytes[self.pos])
|
||||
end
|
||||
|
||||
function ctx:get_charcode()
|
||||
if utf8len(self.str) < self.pos then return nil end
|
||||
return utf8unicode(self:get_char())
|
||||
if self.len <= self.pos then return nil end
|
||||
return self.bytes[self.pos]
|
||||
end
|
||||
|
||||
function ctx:next_function()
|
||||
|
@ -7,7 +7,7 @@ utf8.config.ends = utf8.config.ends or {
|
||||
function utf8.regex.compiletime.ends.parse(regex, c, bs, ctx)
|
||||
for _, m in ipairs(utf8.config.ends) do
|
||||
local functions, move = m.parse(regex, c, bs, ctx)
|
||||
utf8.debug("ends", _, c, bs, nbs, move, functions)
|
||||
utf8.debug("ends", _, c, bs, move, functions)
|
||||
if functions then
|
||||
return functions, move
|
||||
end
|
||||
|
@ -13,7 +13,8 @@ local matchers = {
|
||||
return [[
|
||||
add(function(ctx) -- toend
|
||||
ctx.result.finish = ctx.pos - 1
|
||||
if ctx.pos == #ctx.str + 1 then ctx:done() end
|
||||
ctx.modified = true
|
||||
if ctx.pos == utf8len(ctx.str) + 1 then ctx:done() end
|
||||
end)
|
||||
]]
|
||||
end,
|
||||
|
43
loveframes/third-party/utf8/functions/lua53.lua
vendored
43
loveframes/third-party/utf8/functions/lua53.lua
vendored
@ -3,7 +3,22 @@ return function(utf8)
|
||||
local utf8sub = utf8.sub
|
||||
local utf8gensub = utf8.gensub
|
||||
local unpack = utf8.config.unpack
|
||||
local get_matcher_function = utf8:require 'regex_parser'
|
||||
local generate_matcher_function = utf8:require 'regex_parser'
|
||||
|
||||
function get_matcher_function(regex, plain)
|
||||
local res
|
||||
if utf8.config.cache then
|
||||
res = utf8.config.cache[plain and "plain" or "regex"][regex]
|
||||
end
|
||||
if res then
|
||||
return res
|
||||
end
|
||||
res = generate_matcher_function(regex, plain)
|
||||
if utf8.config.cache then
|
||||
utf8.config.cache[plain and "plain" or "regex"][regex] = res
|
||||
end
|
||||
return res
|
||||
end
|
||||
|
||||
local function utf8find(str, regex, init, plain)
|
||||
local func = get_matcher_function(regex, plain)
|
||||
@ -19,7 +34,8 @@ local function utf8find(str, regex, init, plain)
|
||||
end
|
||||
|
||||
local function utf8match(str, regex, init)
|
||||
local func = get_matcher_function(regex, plain, utf8)
|
||||
local func = get_matcher_function(regex, false)
|
||||
init = ((init or 1) < 0) and (utf8.len(str) + init + 1) or init
|
||||
local ctx, result, captures = func(str, init, utf8)
|
||||
if not ctx then return nil end
|
||||
|
||||
@ -34,7 +50,7 @@ end
|
||||
|
||||
local function utf8gmatch(str, regex)
|
||||
regex = (utf8sub(regex,1,1) ~= '^') and regex or '%' .. regex
|
||||
local func = get_matcher_function(regex, plain, utf8)
|
||||
local func = get_matcher_function(regex, false)
|
||||
local ctx, result, captures
|
||||
local continue_pos = 1
|
||||
|
||||
@ -71,7 +87,7 @@ local function replace(repl, args)
|
||||
else
|
||||
num = tonumber(c)
|
||||
if num then
|
||||
ret = ret .. args[num]
|
||||
ret = ret .. assert(args[num], "invalid capture index %" .. c)
|
||||
else
|
||||
ret = ret .. c
|
||||
end
|
||||
@ -79,13 +95,9 @@ local function replace(repl, args)
|
||||
end
|
||||
end
|
||||
elseif type(repl) == 'table' then
|
||||
ret = repl[args[1] or args[0]] or ''
|
||||
ret = repl[args[1]] or args[0]
|
||||
elseif type(repl) == 'function' then
|
||||
if #args > 0 then
|
||||
ret = repl(unpack(args, 1)) or ''
|
||||
else
|
||||
ret = repl(args[0]) or ''
|
||||
end
|
||||
ret = repl(unpack(args, 1)) or args[0]
|
||||
end
|
||||
return ret
|
||||
end
|
||||
@ -95,8 +107,7 @@ local function utf8gsub(str, regex, repl, limit)
|
||||
local subbed = ''
|
||||
local prev_sub_finish = 1
|
||||
|
||||
regex = (utf8sub(regex,1,1) ~= '^') and regex or '%' .. regex
|
||||
local func = get_matcher_function(regex, plain, utf8)
|
||||
local func = get_matcher_function(regex, false)
|
||||
local ctx, result, captures
|
||||
local continue_pos = 1
|
||||
|
||||
@ -111,7 +122,13 @@ local function utf8gsub(str, regex, repl, limit)
|
||||
utf8.debug('captures:', captures)
|
||||
|
||||
continue_pos = math.max(result.finish + 1, result.start + 1)
|
||||
local args = {[0] = utf8sub(str, result.start, result.finish), unpack(captures)}
|
||||
local args
|
||||
if #captures > 0 then
|
||||
args = {[0] = utf8sub(str, result.start, result.finish), unpack(captures)}
|
||||
else
|
||||
args = {[0] = utf8sub(str, result.start, result.finish)}
|
||||
args[1] = args[0]
|
||||
end
|
||||
|
||||
subbed = subbed .. utf8sub(str, prev_sub_finish, result.start - 1)
|
||||
subbed = subbed .. replace(repl, args)
|
||||
|
12
loveframes/third-party/utf8/init.lua
vendored
12
loveframes/third-party/utf8/init.lua
vendored
@ -1,5 +1,8 @@
|
||||
local module_path = ...
|
||||
module_path = module_path:match("^(.-)init$") or (module_path .. '.')
|
||||
|
||||
local ffi_enabled, ffi = pcall(require, 'ffi')
|
||||
|
||||
local utf8 = {
|
||||
config = {},
|
||||
default = {
|
||||
@ -15,7 +18,14 @@ local utf8 = {
|
||||
__mode = 'kv'
|
||||
}),
|
||||
},
|
||||
locale = "C.UTF-8",
|
||||
locale = nil,
|
||||
int32array = function(size)
|
||||
if ffi_enabled then
|
||||
return ffi.new("uint32_t[?]", size + 1)
|
||||
else
|
||||
return {}
|
||||
end
|
||||
end
|
||||
},
|
||||
regex = {
|
||||
compiletime = {
|
||||
|
@ -7,12 +7,13 @@ local matchers = {
|
||||
local ]] .. class_name .. [[ = ]] .. class .. [[
|
||||
|
||||
add(function(ctx) -- frontier
|
||||
ctx.pos = ctx.pos - 1
|
||||
local prev_charcode = ctx:get_charcode()
|
||||
ctx:prev_char()
|
||||
local prev_charcode = ctx:get_charcode() or 0
|
||||
ctx:next_char()
|
||||
debug("frontier pos", ctx.pos, "prev_charcode", prev_charcode, "charcode", ctx:get_charcode())
|
||||
local charcode = ctx:get_charcode() or 0
|
||||
-- debug("frontier pos", ctx.pos, "prev_charcode", prev_charcode, "charcode", charcode)
|
||||
if ]] .. class_name .. [[:test(prev_charcode) then return end
|
||||
if ]] .. class_name .. [[:test(ctx:get_charcode()) then
|
||||
if ]] .. class_name .. [[:test(charcode) then
|
||||
ctx:next_function()
|
||||
return ctx:get_function()(ctx)
|
||||
end
|
||||
|
@ -9,7 +9,7 @@ utf8.config.modifier = utf8.config.modifier or {
|
||||
function utf8.regex.compiletime.modifier.parse(regex, c, bs, ctx)
|
||||
for _, m in ipairs(utf8.config.modifier) do
|
||||
local functions, move = m.parse(regex, c, bs, ctx)
|
||||
utf8.debug("mod", _, c, bs, nbs, move, functions and utf8.config.unpack(functions))
|
||||
utf8.debug("mod", _, c, bs, move, functions and utf8.config.unpack(functions))
|
||||
if functions then
|
||||
ctx.prev_class = nil
|
||||
return functions, move
|
||||
|
@ -7,7 +7,7 @@ local matchers = {
|
||||
local ]] .. class_name .. [[ = ]] .. class .. [[
|
||||
|
||||
add(function(ctx) -- simple
|
||||
debug(ctx, 'simple', ']] .. class_name .. [[')
|
||||
-- debug(ctx, 'simple', ']] .. class_name .. [[')
|
||||
if ]] .. class_name .. [[:test(ctx:get_charcode()) then
|
||||
ctx:next_char()
|
||||
ctx:next_function()
|
||||
|
@ -15,7 +15,7 @@ end
|
||||
|
||||
local function check(ctx)
|
||||
if ctx.prev_class then
|
||||
table.insert(ctx.funcs, matchers.simple(ctx.prev_class, tostring(bs)))
|
||||
table.insert(ctx.funcs, matchers.simple(ctx.prev_class, tostring(ctx.pos)))
|
||||
ctx.prev_class = nil
|
||||
end
|
||||
end
|
||||
|
@ -10,18 +10,21 @@ local matchers = {
|
||||
local ]] .. class_name .. [[ = ]] .. class .. [[
|
||||
|
||||
add(function(ctx) -- star
|
||||
debug(ctx, 'star', ']] .. class_name .. [[')
|
||||
local saved = {ctx:clone()}
|
||||
while ]] .. class_name .. [[:test(ctx:get_charcode()) do
|
||||
ctx:next_char()
|
||||
table.insert(saved, ctx:clone())
|
||||
debug('#saved <<', #saved)
|
||||
-- debug(ctx, 'star', ']] .. class_name .. [[')
|
||||
local clone = ctx:clone()
|
||||
while ]] .. class_name .. [[:test(clone:get_charcode()) do
|
||||
clone:next_char()
|
||||
end
|
||||
while #saved > 0 do
|
||||
ctx = table.remove(saved)
|
||||
ctx:next_function()
|
||||
ctx:get_function()(ctx)
|
||||
debug('#saved >>', #saved)
|
||||
local pos = clone.pos
|
||||
while pos >= ctx.pos do
|
||||
clone.pos = pos
|
||||
clone.func_pos = ctx.func_pos
|
||||
clone:next_function()
|
||||
clone:get_function()(clone)
|
||||
if clone.modified then
|
||||
clone = ctx:clone()
|
||||
end
|
||||
pos = pos - 1
|
||||
end
|
||||
end)
|
||||
]]
|
||||
@ -32,15 +35,23 @@ local matchers = {
|
||||
local ]] .. class_name .. [[ = ]] .. class .. [[
|
||||
|
||||
add(function(ctx) -- minus
|
||||
debug(ctx, 'minus', ']] .. class_name .. [[')
|
||||
-- debug(ctx, 'minus', ']] .. class_name .. [[')
|
||||
|
||||
local clone = ctx:clone()
|
||||
local pos
|
||||
repeat
|
||||
local saved = ctx:clone()
|
||||
ctx:next_function()
|
||||
ctx:get_function()(ctx)
|
||||
ctx = saved
|
||||
local match = ]] .. class_name .. [[:test(ctx:get_charcode())
|
||||
ctx:next_char()
|
||||
pos = clone.pos
|
||||
clone:next_function()
|
||||
clone:get_function()(clone)
|
||||
if clone.modified then
|
||||
clone = ctx:clone()
|
||||
clone.pos = pos
|
||||
else
|
||||
clone.pos = pos
|
||||
clone.func_pos = ctx.func_pos
|
||||
end
|
||||
local match = ]] .. class_name .. [[:test(clone:get_charcode())
|
||||
clone:next_char()
|
||||
until not match
|
||||
end)
|
||||
]]
|
||||
@ -51,7 +62,7 @@ local matchers = {
|
||||
local ]] .. class_name .. [[ = ]] .. class .. [[
|
||||
|
||||
add(function(ctx) -- question
|
||||
debug(ctx, 'question', ']] .. class_name .. [[')
|
||||
-- debug(ctx, 'question', ']] .. class_name .. [[')
|
||||
local saved = ctx:clone()
|
||||
if ]] .. class_name .. [[:test(ctx:get_charcode()) then
|
||||
ctx:next_char()
|
||||
@ -67,8 +78,9 @@ local matchers = {
|
||||
capture_start = function(number)
|
||||
return [[
|
||||
add(function(ctx)
|
||||
debug(ctx, 'capture_start', ']] .. tostring(number) .. [[')
|
||||
table.insert(ctx.captures.active, { id = ]] .. tostring(number) .. [[, start_byte = byte_pos, start = ctx.pos })
|
||||
ctx.modified = true
|
||||
-- debug(ctx, 'capture_start', ']] .. tostring(number) .. [[')
|
||||
table.insert(ctx.captures.active, { id = ]] .. tostring(number) .. [[, start = ctx.pos })
|
||||
ctx:next_function()
|
||||
return ctx:get_function()(ctx)
|
||||
end)
|
||||
@ -77,12 +89,34 @@ local matchers = {
|
||||
capture_finish = function(number)
|
||||
return [[
|
||||
add(function(ctx)
|
||||
debug(ctx, 'capture_finish', ']] .. tostring(number) .. [[')
|
||||
ctx.modified = true
|
||||
-- debug(ctx, 'capture_finish', ']] .. tostring(number) .. [[')
|
||||
local cap = table.remove(ctx.captures.active)
|
||||
cap.finish_byte = byte_pos
|
||||
cap.finish = ctx.pos
|
||||
ctx.captures[cap.id] = utf8sub(ctx.str, cap.start, cap.finish - 1)
|
||||
debug('capture#' .. tostring(cap.id), '[' .. tostring(cap.start).. ',' .. tostring(cap.finish) .. ']' , 'is', ctx.captures[cap.id])
|
||||
local b, e = ctx.offsets[cap.start], ctx.offsets[cap.finish]
|
||||
if cap.start < 1 then
|
||||
b = 1
|
||||
elseif cap.start >= ctx.len then
|
||||
b = ctx.rawlen + 1
|
||||
end
|
||||
if cap.finish < 1 then
|
||||
e = 1
|
||||
elseif cap.finish >= ctx.len then
|
||||
e = ctx.rawlen + 1
|
||||
end
|
||||
ctx.captures[cap.id] = rawsub(ctx.str, b, e - 1)
|
||||
-- debug('capture#' .. tostring(cap.id), '[' .. tostring(cap.start).. ',' .. tostring(cap.finish) .. ']' , 'is', ctx.captures[cap.id])
|
||||
ctx:next_function()
|
||||
return ctx:get_function()(ctx)
|
||||
end)
|
||||
]]
|
||||
end,
|
||||
capture_position = function(number)
|
||||
return [[
|
||||
add(function(ctx)
|
||||
ctx.modified = true
|
||||
-- debug(ctx, 'capture_position', ']] .. tostring(number) .. [[')
|
||||
ctx.captures[ ]] .. tostring(number) .. [[ ] = ctx.pos
|
||||
ctx:next_function()
|
||||
return ctx:get_function()(ctx)
|
||||
end)
|
||||
@ -91,11 +125,11 @@ local matchers = {
|
||||
capture = function(number)
|
||||
return [[
|
||||
add(function(ctx)
|
||||
debug(ctx, 'capture', ']] .. tostring(number) .. [[')
|
||||
-- debug(ctx, 'capture', ']] .. tostring(number) .. [[')
|
||||
local cap = ctx.captures[ ]] .. tostring(number) .. [[ ]
|
||||
local len = utf8len(cap)
|
||||
local check = utf8sub(ctx.str, ctx.pos, ctx.pos + len - 1)
|
||||
debug("capture check:", cap, check)
|
||||
-- debug("capture check:", cap, check)
|
||||
if cap == check then
|
||||
ctx.pos = ctx.pos + len
|
||||
ctx:next_function()
|
||||
@ -121,9 +155,9 @@ local matchers = {
|
||||
elseif c == b then
|
||||
balance = balance - 1
|
||||
end
|
||||
debug("balancer: balance=", balance, ", d=", d, ", b=", b, ", charcode=", ctx:get_charcode())
|
||||
-- debug("balancer: balance=", balance, ", d=", d, ", b=", b, ", charcode=", ctx:get_charcode())
|
||||
ctx:next_char()
|
||||
until balance == 0
|
||||
until balance == 0 or (balance == 2 and d == b)
|
||||
ctx:next_function()
|
||||
return ctx:get_function()(ctx)
|
||||
end)
|
||||
@ -139,6 +173,9 @@ local function parse(regex, c, bs, ctx)
|
||||
if c == '%' then
|
||||
c, nbs = next(regex, bs)
|
||||
utf8.debug("next", c, bs)
|
||||
if c == '' then
|
||||
error("malformed pattern (ends with '%')")
|
||||
end
|
||||
if utf8.raw.find('123456789', c, 1, true) then
|
||||
functions = { matchers.capture(tonumber(c)) }
|
||||
nbs = utf8.next(regex, nbs)
|
||||
@ -146,6 +183,7 @@ local function parse(regex, c, bs, ctx)
|
||||
local d, b
|
||||
d, nbs = next(regex, nbs)
|
||||
b, nbs = next(regex, nbs)
|
||||
assert(d ~= '' and b ~= '', "unbalanced pattern")
|
||||
functions = { matchers.balancer({d, b}, tostring(bs)) }
|
||||
nbs = utf8.next(regex, nbs)
|
||||
end
|
||||
@ -191,13 +229,19 @@ local function parse(regex, c, bs, ctx)
|
||||
nbs = bs + 1
|
||||
elseif c == '(' then
|
||||
ctx.capture = ctx.capture or {balance = 0, id = 0}
|
||||
ctx.capture.balance = ctx.capture.balance + 1
|
||||
ctx.capture.id = ctx.capture.id + 1
|
||||
local nc = next(regex, nbs)
|
||||
if nc == ')' then
|
||||
functions = {matchers.capture_position(ctx.capture.id)}
|
||||
nbs = bs + 2
|
||||
else
|
||||
ctx.capture.balance = ctx.capture.balance + 1
|
||||
functions = {matchers.capture_start(ctx.capture.id)}
|
||||
nbs = bs + 1
|
||||
end
|
||||
if ctx.prev_class then
|
||||
table.insert(functions, 1, matchers.simple(ctx.prev_class, tostring(bs)))
|
||||
end
|
||||
nbs = bs + 1
|
||||
elseif c == ')' then
|
||||
ctx.capture = ctx.capture or {balance = 0, id = 0}
|
||||
functions = { matchers.capture_finish(ctx.capture.id) }
|
||||
|
37
loveframes/third-party/utf8/primitives/dummy.lua
vendored
37
loveframes/third-party/utf8/primitives/dummy.lua
vendored
@ -80,8 +80,14 @@ local function utf8symbollen(byte)
|
||||
return not byte and 0 or (byte < 0x80 and 1) or (byte >= 0xF0 and 4) or (byte >= 0xE0 and 3) or (byte >= 0xC0 and 2) or 1
|
||||
end
|
||||
|
||||
local head_table = utf8.config.int32array(256)
|
||||
for i = 0, 255 do
|
||||
head_table[i] = utf8symbollen(i)
|
||||
end
|
||||
head_table[256] = 0
|
||||
|
||||
local function utf8charbytes(str, bs)
|
||||
return utf8symbollen(byte(str, bs))
|
||||
return head_table[byte(str, bs) or 256]
|
||||
end
|
||||
|
||||
local function utf8next(str, bs)
|
||||
@ -201,13 +207,12 @@ utf8unicode = function(str, ibs, jbs)
|
||||
|
||||
bytes = utf8charbytes(str, ibs)
|
||||
if bytes == 0 then return end
|
||||
ch = sub(str,ibs,ibs-1+bytes)
|
||||
|
||||
local unicode
|
||||
|
||||
if bytes == 1 then unicode = byte(ch) end
|
||||
if bytes == 1 then unicode = byte(str, ibs, ibs) end
|
||||
if bytes == 2 then
|
||||
local byte0,byte1 = byte(ch,1,2)
|
||||
local byte0,byte1 = byte(str, ibs, ibs + 1)
|
||||
if byte0 and byte1 then
|
||||
local code0,code1 = byte0-0xC0,byte1-0x80
|
||||
unicode = code0*shift_6 + code1
|
||||
@ -216,7 +221,7 @@ utf8unicode = function(str, ibs, jbs)
|
||||
end
|
||||
end
|
||||
if bytes == 3 then
|
||||
local byte0,byte1,byte2 = byte(ch,1,3)
|
||||
local byte0,byte1,byte2 = byte(str, ibs, ibs + 2)
|
||||
if byte0 and byte1 and byte2 then
|
||||
local code0,code1,code2 = byte0-0xE0,byte1-0x80,byte2-0x80
|
||||
unicode = code0*shift_12 + code1*shift_6 + code2
|
||||
@ -225,7 +230,7 @@ utf8unicode = function(str, ibs, jbs)
|
||||
end
|
||||
end
|
||||
if bytes == 4 then
|
||||
local byte0,byte1,byte2,byte3 = byte(ch,1,4)
|
||||
local byte0,byte1,byte2,byte3 = byte(str, ibs, ibs + 3)
|
||||
if byte0 and byte1 and byte2 and byte3 then
|
||||
local code0,code1,code2,code3 = byte0-0xF0,byte1-0x80,byte2-0x80,byte3-0x80
|
||||
unicode = code0*shift_18 + code1*shift_12 + code2*shift_6 + code3
|
||||
@ -234,8 +239,12 @@ utf8unicode = function(str, ibs, jbs)
|
||||
end
|
||||
end
|
||||
|
||||
if ibs == jbs then
|
||||
return unicode
|
||||
else
|
||||
return unicode,utf8unicode(str, ibs+bytes, jbs)
|
||||
end
|
||||
end
|
||||
|
||||
local function utf8byte(str, i, j)
|
||||
if #str == 0 then return end
|
||||
@ -281,7 +290,7 @@ local function utf8gensub(str, sub_len)
|
||||
return function(skip_ptr, bs)
|
||||
bs = (bs and bs or 1) + (skip_ptr and (skip_ptr[1] or 0) or 0)
|
||||
|
||||
nbs = bs
|
||||
local nbs = bs
|
||||
if bs > max_len then return nil end
|
||||
for i = 1, sub_len do
|
||||
nbs = utf8next(str, nbs)
|
||||
@ -427,7 +436,7 @@ local function utf8offset(str, n, bs)
|
||||
bs = 1
|
||||
end
|
||||
end
|
||||
if bs < 0 or bs > l + 1 then
|
||||
if bs <= 0 or bs > l + 1 then
|
||||
error("bad argument #3 to 'offset' (position out of range)")
|
||||
end
|
||||
|
||||
@ -437,8 +446,8 @@ local function utf8offset(str, n, bs)
|
||||
end
|
||||
while true do
|
||||
local b = byte(str, bs)
|
||||
if 0 < b and b < 127
|
||||
or 194 < b and b < 244 then
|
||||
if (0 < b and b < 127)
|
||||
or (194 < b and b < 244) then
|
||||
return bs
|
||||
end
|
||||
bs = bs - 1
|
||||
@ -454,8 +463,8 @@ local function utf8offset(str, n, bs)
|
||||
end
|
||||
|
||||
local b = byte(str, bs)
|
||||
if 0 < b and b < 127
|
||||
or 194 < b and b < 244 then
|
||||
if (0 < b and b < 127)
|
||||
or (194 < b and b < 244) then
|
||||
n = n + 1
|
||||
end
|
||||
bs = bs - 1
|
||||
@ -468,8 +477,8 @@ local function utf8offset(str, n, bs)
|
||||
end
|
||||
|
||||
local b = byte(str, bs)
|
||||
if 0 < b and b < 127
|
||||
or 194 < b and b < 244 then
|
||||
if (0 < b and b < 127)
|
||||
or (194 < b and b < 244) then
|
||||
n = n - 1
|
||||
for i = 1, n do
|
||||
if bs > l then
|
||||
|
@ -1,12 +1,19 @@
|
||||
return function(utf8)
|
||||
|
||||
os.setlocale(utf8.config.locale, "ctype")
|
||||
|
||||
local ffi = require("ffi")
|
||||
if ffi.os == "Windows" then
|
||||
os.setlocale(utf8.config.locale or "english_us.65001", "ctype")
|
||||
ffi.cdef[[
|
||||
short towupper(short c);
|
||||
short towlower(short c);
|
||||
]]
|
||||
else
|
||||
os.setlocale(utf8.config.locale or "C.UTF-8", "ctype")
|
||||
ffi.cdef[[
|
||||
int towupper(int c);
|
||||
int towlower(int c);
|
||||
]]
|
||||
end
|
||||
|
||||
utf8:require "primitives.dummy"
|
||||
|
||||
|
2
loveframes/third-party/utf8/regex_parser.lua
vendored
2
loveframes/third-party/utf8/regex_parser.lua
vendored
@ -58,7 +58,9 @@ return function(regex, plain)
|
||||
local ctx = utf8:require("context.runtime").new({str = str, pos = init or 1})
|
||||
local cl = utf8:require("charclass.runtime.init")
|
||||
local utf8sub = utf8.sub
|
||||
local rawsub = utf8.raw.sub
|
||||
local utf8len = utf8.len
|
||||
local utf8next = utf8.next
|
||||
local debug = utf8.debug
|
||||
local function add(fun)
|
||||
ctx.functions[#ctx.functions + 1] = fun
|
||||
|
Loading…
Reference in New Issue
Block a user