mirror of
https://github.com/linux-man/LoveFrames.git
synced 2024-11-18 16:04:22 +00:00
utf8 lib update
This commit is contained in:
parent
36e3d5874c
commit
72886439e7
@ -7,7 +7,7 @@ utf8.config.begins = utf8.config.begins or {
|
|||||||
function utf8.regex.compiletime.begins.parse(regex, c, bs, ctx)
|
function utf8.regex.compiletime.begins.parse(regex, c, bs, ctx)
|
||||||
for _, m in ipairs(utf8.config.begins) do
|
for _, m in ipairs(utf8.config.begins) do
|
||||||
local functions, move = m.parse(regex, c, bs, ctx)
|
local functions, move = m.parse(regex, c, bs, ctx)
|
||||||
utf8.debug("begins", _, c, bs, nbs, move, functions)
|
utf8.debug("begins", _, c, bs, move, functions)
|
||||||
if functions then
|
if functions then
|
||||||
return functions, move
|
return functions, move
|
||||||
end
|
end
|
||||||
|
@ -4,26 +4,25 @@ local matchers = {
|
|||||||
sliding = function()
|
sliding = function()
|
||||||
return [[
|
return [[
|
||||||
add(function(ctx) -- sliding
|
add(function(ctx) -- sliding
|
||||||
local saved = ctx:clone()
|
while ctx.pos <= ctx.len do
|
||||||
local start_pos = ctx.pos
|
local clone = ctx:clone()
|
||||||
while ctx.pos <= 1 + utf8len(ctx.str) do
|
-- debug('starting from', clone, "start_pos", clone.pos)
|
||||||
debug('starting from', ctx, "start_pos", start_pos)
|
clone.result.start = clone.pos
|
||||||
ctx.result.start = ctx.pos
|
clone:next_function()
|
||||||
ctx:next_function()
|
clone:get_function()(clone)
|
||||||
ctx:get_function()(ctx)
|
|
||||||
|
|
||||||
ctx = saved:clone()
|
ctx:next_char()
|
||||||
start_pos = start_pos + 1
|
end
|
||||||
ctx.pos = start_pos
|
ctx:terminate()
|
||||||
end
|
|
||||||
ctx:terminate()
|
|
||||||
end)
|
end)
|
||||||
]]
|
]]
|
||||||
end,
|
end,
|
||||||
fromstart = function(ctx)
|
fromstart = function(ctx)
|
||||||
return [[
|
return [[
|
||||||
add(function(ctx) -- fromstart
|
add(function(ctx) -- fromstart
|
||||||
local saved = ctx:clone()
|
if ctx.byte_pos > ctx.len then
|
||||||
|
return
|
||||||
|
end
|
||||||
ctx.result.start = ctx.pos
|
ctx.result.start = ctx.pos
|
||||||
ctx:next_function()
|
ctx:next_function()
|
||||||
ctx:get_function()(ctx)
|
ctx:get_function()(ctx)
|
||||||
|
@ -89,34 +89,38 @@ function builder:include(b)
|
|||||||
end
|
end
|
||||||
|
|
||||||
function builder:build()
|
function builder:build()
|
||||||
local codes_list = table.concat(self.codes or {}, ', ')
|
if self.codes and #self.codes == 1 and not self.inverted and not self.ranges and not self.classes and not self.not_classes and not self.includes then
|
||||||
local ranges_list = ''
|
return "{test = function(self, cc) return cc == " .. self.codes[1] .. " end}"
|
||||||
for i, r in ipairs(self.ranges or {}) do ranges_list = ranges_list .. (i > 1 and ', {' or '{') .. tostring(r[1]) .. ', ' .. tostring(r[2]) .. '}' end
|
else
|
||||||
local classes_list = ''
|
local codes_list = table.concat(self.codes or {}, ', ')
|
||||||
if self.classes then classes_list = "'" .. table.concat(self.classes, "', '") .. "'" end
|
local ranges_list = ''
|
||||||
local not_classes_list = ''
|
for i, r in ipairs(self.ranges or {}) do ranges_list = ranges_list .. (i > 1 and ', {' or '{') .. tostring(r[1]) .. ', ' .. tostring(r[2]) .. '}' end
|
||||||
if self.not_classes then not_classes_list = "'" .. table.concat(self.not_classes, "', '") .. "'" end
|
local classes_list = ''
|
||||||
|
if self.classes then classes_list = "'" .. table.concat(self.classes, "', '") .. "'" end
|
||||||
|
local not_classes_list = ''
|
||||||
|
if self.not_classes then not_classes_list = "'" .. table.concat(self.not_classes, "', '") .. "'" end
|
||||||
|
|
||||||
local subs_list = ''
|
local subs_list = ''
|
||||||
for i, r in ipairs(self.includes or {}) do subs_list = subs_list .. (i > 1 and ', ' or '') .. r:build() .. '' end
|
for i, r in ipairs(self.includes or {}) do subs_list = subs_list .. (i > 1 and ', ' or '') .. r:build() .. '' end
|
||||||
|
|
||||||
local src = [[cl.new():with_codes(
|
local src = [[cl.new():with_codes(
|
||||||
]] .. codes_list .. [[
|
]] .. codes_list .. [[
|
||||||
):with_ranges(
|
):with_ranges(
|
||||||
]] .. ranges_list .. [[
|
]] .. ranges_list .. [[
|
||||||
):with_classes(
|
):with_classes(
|
||||||
]] .. classes_list .. [[
|
]] .. classes_list .. [[
|
||||||
):without_classes(
|
):without_classes(
|
||||||
]] .. not_classes_list .. [[
|
]] .. not_classes_list .. [[
|
||||||
):with_subs(
|
):with_subs(
|
||||||
]] .. subs_list .. [[
|
]] .. subs_list .. [[
|
||||||
)]]
|
)]]
|
||||||
|
|
||||||
if self.inverted then
|
if self.inverted then
|
||||||
src = src .. ':invert()'
|
src = src .. ':invert()'
|
||||||
|
end
|
||||||
|
|
||||||
|
return src
|
||||||
end
|
end
|
||||||
|
|
||||||
return src
|
|
||||||
end
|
end
|
||||||
|
|
||||||
return builder
|
return builder
|
||||||
|
@ -28,7 +28,7 @@ return function(str, c, bs, ctx)
|
|||||||
if c == '%' then
|
if c == '%' then
|
||||||
c, nbs = next(str, nbs)
|
c, nbs = next(str, nbs)
|
||||||
r2 = c
|
r2 = c
|
||||||
elseif c ~= '' then
|
elseif c ~= '' and c ~= ']' then
|
||||||
r2 = c
|
r2 = c
|
||||||
end
|
end
|
||||||
|
|
||||||
|
@ -16,6 +16,9 @@ local function parse(str, c, bs, ctx)
|
|||||||
|
|
||||||
if c == '%' then
|
if c == '%' then
|
||||||
c, nbs = next(str, bs)
|
c, nbs = next(str, bs)
|
||||||
|
if c == '' then
|
||||||
|
error("malformed pattern (ends with '%')")
|
||||||
|
end
|
||||||
local _c = utf8.raw.lower(c)
|
local _c = utf8.raw.lower(c)
|
||||||
local matched
|
local matched
|
||||||
if _c == 'a' then
|
if _c == 'a' then
|
||||||
@ -46,8 +49,15 @@ local function parse(str, c, bs, ctx)
|
|||||||
else
|
else
|
||||||
class = cl.new():with_classes(matched)
|
class = cl.new():with_classes(matched)
|
||||||
end
|
end
|
||||||
|
elseif _c == 'z' then
|
||||||
|
class = cl.new():with_codes(0)
|
||||||
|
if _c ~= c then
|
||||||
|
class = class:invert()
|
||||||
|
end
|
||||||
|
else
|
||||||
|
class = cl.new():with_codes(c)
|
||||||
end
|
end
|
||||||
elseif c == '[' then
|
elseif c == '[' and not ctx.internal then
|
||||||
local old_internal = ctx.internal
|
local old_internal = ctx.internal
|
||||||
ctx.internal = true
|
ctx.internal = true
|
||||||
class = cl.new()
|
class = cl.new()
|
||||||
@ -58,9 +68,18 @@ local function parse(str, c, bs, ctx)
|
|||||||
utf8.debug("next", tttt, c, nbs)
|
utf8.debug("next", tttt, c, nbs)
|
||||||
if c == '^' and firstletter then
|
if c == '^' and firstletter then
|
||||||
class:invert()
|
class:invert()
|
||||||
|
local nc, nnbs = next(str, nbs)
|
||||||
|
if nc == ']' then
|
||||||
|
class:with_codes(nc)
|
||||||
|
nbs = nnbs
|
||||||
|
end
|
||||||
elseif c == ']' then
|
elseif c == ']' then
|
||||||
utf8.debug('] on pos', tttt, nbs)
|
if firstletter then
|
||||||
break
|
class:with_codes(c)
|
||||||
|
else
|
||||||
|
utf8.debug('] on pos', tttt, nbs)
|
||||||
|
break
|
||||||
|
end
|
||||||
elseif c == '' then
|
elseif c == '' then
|
||||||
error "malformed pattern (missing ']')"
|
error "malformed pattern (missing ']')"
|
||||||
else
|
else
|
||||||
|
@ -71,7 +71,7 @@ function class:with_subs(...)
|
|||||||
end
|
end
|
||||||
|
|
||||||
function class:in_codes(item)
|
function class:in_codes(item)
|
||||||
if not self.codes then return false end
|
if not self.codes or #self.codes == 0 then return nil end
|
||||||
|
|
||||||
local head, tail = 1, #self.codes
|
local head, tail = 1, #self.codes
|
||||||
local mid = math.floor((head + tail)/2)
|
local mid = math.floor((head + tail)/2)
|
||||||
@ -93,7 +93,7 @@ function class:in_codes(item)
|
|||||||
end
|
end
|
||||||
|
|
||||||
function class:in_ranges(char_code)
|
function class:in_ranges(char_code)
|
||||||
if not self.ranges then return false end
|
if not self.ranges or #self.ranges == 0 then return nil end
|
||||||
|
|
||||||
for _,r in ipairs(self.ranges) do
|
for _,r in ipairs(self.ranges) do
|
||||||
if r[1] <= char_code and char_code <= r[2] then
|
if r[1] <= char_code and char_code <= r[2] then
|
||||||
@ -104,7 +104,7 @@ function class:in_ranges(char_code)
|
|||||||
end
|
end
|
||||||
|
|
||||||
function class:in_classes(char_code)
|
function class:in_classes(char_code)
|
||||||
if not self.classes then return false end
|
if not self.classes or #self.classes == 0 then return nil end
|
||||||
|
|
||||||
for _, class in ipairs(self.classes) do
|
for _, class in ipairs(self.classes) do
|
||||||
if self:is(class, char_code) then
|
if self:is(class, char_code) then
|
||||||
@ -115,7 +115,7 @@ function class:in_classes(char_code)
|
|||||||
end
|
end
|
||||||
|
|
||||||
function class:in_not_classes(char_code)
|
function class:in_not_classes(char_code)
|
||||||
if not self.not_classes then return false end
|
if not self.not_classes or #self.not_classes == 0 then return nil end
|
||||||
|
|
||||||
for _, class in ipairs(self.not_classes) do
|
for _, class in ipairs(self.not_classes) do
|
||||||
if self:is(class, char_code) then
|
if self:is(class, char_code) then
|
||||||
@ -130,7 +130,7 @@ function class:is(class, char_code)
|
|||||||
end
|
end
|
||||||
|
|
||||||
function class:in_subs(char_code)
|
function class:in_subs(char_code)
|
||||||
if not self.subs or #self.subs == 0 then return false end
|
if not self.subs or #self.subs == 0 then return nil end
|
||||||
|
|
||||||
for _, c in ipairs(self.subs) do
|
for _, c in ipairs(self.subs) do
|
||||||
if not c:test(char_code) then
|
if not c:test(char_code) then
|
||||||
@ -142,20 +142,40 @@ end
|
|||||||
|
|
||||||
function class:test(char_code)
|
function class:test(char_code)
|
||||||
local result = self:do_test(char_code)
|
local result = self:do_test(char_code)
|
||||||
utf8.debug('class:test', result, "'" .. (char_code and utf8.char(char_code) or 'nil') .. "'", char_code)
|
-- utf8.debug('class:test', result, "'" .. (char_code and utf8.char(char_code) or 'nil') .. "'", char_code)
|
||||||
return result
|
return result
|
||||||
end
|
end
|
||||||
|
|
||||||
function class:do_test(char_code)
|
function class:do_test(char_code)
|
||||||
if not char_code then return false end
|
if not char_code then return false end
|
||||||
local found = (self:in_codes(char_code) or self:in_ranges(char_code) or self:in_classes(char_code) or self:in_subs(char_code)) and not self:in_not_classes(char_code)
|
local in_not_classes = self:in_not_classes(char_code)
|
||||||
utf8.debug('class:do_test', 'found', found, 'inverted', self.inverted, 'result', self.inverted and not found or found)
|
if in_not_classes then
|
||||||
-- utf8.debug(self:in_codes(char_code), self:in_ranges(char_code), self:in_classes(char_code), self:in_subs(char_code), not self:in_not_classes(char_code))
|
return not not self.inverted
|
||||||
-- ternary if ideom (self.inverted and not found or found) doesn't work with booleans >_<
|
end
|
||||||
if self.inverted then
|
local in_codes = self:in_codes(char_code)
|
||||||
return not found
|
if in_codes then
|
||||||
|
return not self.inverted
|
||||||
|
end
|
||||||
|
local in_ranges = self:in_ranges(char_code)
|
||||||
|
if in_ranges then
|
||||||
|
return not self.inverted
|
||||||
|
end
|
||||||
|
local in_classes = self:in_classes(char_code)
|
||||||
|
if in_classes then
|
||||||
|
return not self.inverted
|
||||||
|
end
|
||||||
|
local in_subs = self:in_subs(char_code)
|
||||||
|
if in_subs then
|
||||||
|
return not self.inverted
|
||||||
|
end
|
||||||
|
if (in_codes == nil)
|
||||||
|
and (in_ranges == nil)
|
||||||
|
and (in_classes == nil)
|
||||||
|
and (in_subs == nil)
|
||||||
|
and (in_not_classes == false) then
|
||||||
|
return not self.inverted
|
||||||
else
|
else
|
||||||
return found
|
return not not self.inverted
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
50
loveframes/third-party/utf8/context/runtime.lua
vendored
50
loveframes/third-party/utf8/context/runtime.lua
vendored
@ -1,9 +1,14 @@
|
|||||||
return function(utf8)
|
return function(utf8)
|
||||||
|
|
||||||
local utf8unicode = utf8.byte
|
local utf8unicode = utf8.unicode
|
||||||
local utf8sub = utf8.sub
|
local utf8sub = utf8.sub
|
||||||
|
local sub = utf8.raw.sub
|
||||||
|
local byte = utf8.raw.byte
|
||||||
local utf8len = utf8.len
|
local utf8len = utf8.len
|
||||||
|
local utf8next = utf8.next
|
||||||
local rawgsub = utf8.raw.gsub
|
local rawgsub = utf8.raw.gsub
|
||||||
|
local utf8offset = utf8.offset
|
||||||
|
local utf8char = utf8.char
|
||||||
|
|
||||||
local util = utf8.util
|
local util = utf8.util
|
||||||
|
|
||||||
@ -22,16 +27,42 @@ local mt = {
|
|||||||
|
|
||||||
function ctx.new(obj)
|
function ctx.new(obj)
|
||||||
obj = obj or {}
|
obj = obj or {}
|
||||||
return setmetatable({
|
local res = setmetatable({
|
||||||
pos = obj.pos or 1,
|
pos = obj.pos or 1,
|
||||||
str = obj.str or nil,
|
byte_pos = obj.pos or 1,
|
||||||
|
str = assert(obj.str, "str is required"),
|
||||||
|
len = obj.len,
|
||||||
|
rawlen = obj.rawlen,
|
||||||
|
bytes = obj.bytes,
|
||||||
|
offsets = obj.offsets,
|
||||||
starts = obj.starts or nil,
|
starts = obj.starts or nil,
|
||||||
functions = obj.functions or {},
|
functions = obj.functions or {},
|
||||||
func_pos = obj.func_pos or 1,
|
func_pos = obj.func_pos or 1,
|
||||||
ends = obj.ends or nil,
|
ends = obj.ends or nil,
|
||||||
result = obj.result and util.copy(obj.result) or {},
|
result = obj.result and util.copy(obj.result) or {},
|
||||||
captures = obj.captures and util.copy(obj.captures, true) or {active = {}},
|
captures = obj.captures and util.copy(obj.captures, true) or {active = {}},
|
||||||
|
modified = false,
|
||||||
}, mt)
|
}, mt)
|
||||||
|
if not res.bytes then
|
||||||
|
local str = res.str
|
||||||
|
local l = #str
|
||||||
|
local bytes = utf8.config.int32array(l)
|
||||||
|
local offsets = utf8.config.int32array(l)
|
||||||
|
local c, bs, i = nil, 1, 1
|
||||||
|
while bs <= l do
|
||||||
|
bytes[i] = utf8unicode(str, bs, bs)
|
||||||
|
offsets[i] = bs
|
||||||
|
bs = utf8.next(str, bs)
|
||||||
|
i = i + 1
|
||||||
|
end
|
||||||
|
res.bytes = bytes
|
||||||
|
res.offsets = offsets
|
||||||
|
res.byte_pos = res.pos
|
||||||
|
res.len = i
|
||||||
|
res.rawlen = l
|
||||||
|
end
|
||||||
|
|
||||||
|
return res
|
||||||
end
|
end
|
||||||
|
|
||||||
function ctx:clone()
|
function ctx:clone()
|
||||||
@ -40,15 +71,22 @@ end
|
|||||||
|
|
||||||
function ctx:next_char()
|
function ctx:next_char()
|
||||||
self.pos = self.pos + 1
|
self.pos = self.pos + 1
|
||||||
|
self.byte_pos = self.pos
|
||||||
|
end
|
||||||
|
|
||||||
|
function ctx:prev_char()
|
||||||
|
self.pos = self.pos - 1
|
||||||
|
self.byte_pos = self.pos
|
||||||
end
|
end
|
||||||
|
|
||||||
function ctx:get_char()
|
function ctx:get_char()
|
||||||
return utf8sub(self.str, self.pos, self.pos)
|
if self.len <= self.pos then return "" end
|
||||||
|
return utf8char(self.bytes[self.pos])
|
||||||
end
|
end
|
||||||
|
|
||||||
function ctx:get_charcode()
|
function ctx:get_charcode()
|
||||||
if utf8len(self.str) < self.pos then return nil end
|
if self.len <= self.pos then return nil end
|
||||||
return utf8unicode(self:get_char())
|
return self.bytes[self.pos]
|
||||||
end
|
end
|
||||||
|
|
||||||
function ctx:next_function()
|
function ctx:next_function()
|
||||||
|
@ -7,7 +7,7 @@ utf8.config.ends = utf8.config.ends or {
|
|||||||
function utf8.regex.compiletime.ends.parse(regex, c, bs, ctx)
|
function utf8.regex.compiletime.ends.parse(regex, c, bs, ctx)
|
||||||
for _, m in ipairs(utf8.config.ends) do
|
for _, m in ipairs(utf8.config.ends) do
|
||||||
local functions, move = m.parse(regex, c, bs, ctx)
|
local functions, move = m.parse(regex, c, bs, ctx)
|
||||||
utf8.debug("ends", _, c, bs, nbs, move, functions)
|
utf8.debug("ends", _, c, bs, move, functions)
|
||||||
if functions then
|
if functions then
|
||||||
return functions, move
|
return functions, move
|
||||||
end
|
end
|
||||||
|
@ -13,7 +13,8 @@ local matchers = {
|
|||||||
return [[
|
return [[
|
||||||
add(function(ctx) -- toend
|
add(function(ctx) -- toend
|
||||||
ctx.result.finish = ctx.pos - 1
|
ctx.result.finish = ctx.pos - 1
|
||||||
if ctx.pos == #ctx.str + 1 then ctx:done() end
|
ctx.modified = true
|
||||||
|
if ctx.pos == utf8len(ctx.str) + 1 then ctx:done() end
|
||||||
end)
|
end)
|
||||||
]]
|
]]
|
||||||
end,
|
end,
|
||||||
|
43
loveframes/third-party/utf8/functions/lua53.lua
vendored
43
loveframes/third-party/utf8/functions/lua53.lua
vendored
@ -3,7 +3,22 @@ return function(utf8)
|
|||||||
local utf8sub = utf8.sub
|
local utf8sub = utf8.sub
|
||||||
local utf8gensub = utf8.gensub
|
local utf8gensub = utf8.gensub
|
||||||
local unpack = utf8.config.unpack
|
local unpack = utf8.config.unpack
|
||||||
local get_matcher_function = utf8:require 'regex_parser'
|
local generate_matcher_function = utf8:require 'regex_parser'
|
||||||
|
|
||||||
|
function get_matcher_function(regex, plain)
|
||||||
|
local res
|
||||||
|
if utf8.config.cache then
|
||||||
|
res = utf8.config.cache[plain and "plain" or "regex"][regex]
|
||||||
|
end
|
||||||
|
if res then
|
||||||
|
return res
|
||||||
|
end
|
||||||
|
res = generate_matcher_function(regex, plain)
|
||||||
|
if utf8.config.cache then
|
||||||
|
utf8.config.cache[plain and "plain" or "regex"][regex] = res
|
||||||
|
end
|
||||||
|
return res
|
||||||
|
end
|
||||||
|
|
||||||
local function utf8find(str, regex, init, plain)
|
local function utf8find(str, regex, init, plain)
|
||||||
local func = get_matcher_function(regex, plain)
|
local func = get_matcher_function(regex, plain)
|
||||||
@ -19,7 +34,8 @@ local function utf8find(str, regex, init, plain)
|
|||||||
end
|
end
|
||||||
|
|
||||||
local function utf8match(str, regex, init)
|
local function utf8match(str, regex, init)
|
||||||
local func = get_matcher_function(regex, plain, utf8)
|
local func = get_matcher_function(regex, false)
|
||||||
|
init = ((init or 1) < 0) and (utf8.len(str) + init + 1) or init
|
||||||
local ctx, result, captures = func(str, init, utf8)
|
local ctx, result, captures = func(str, init, utf8)
|
||||||
if not ctx then return nil end
|
if not ctx then return nil end
|
||||||
|
|
||||||
@ -34,7 +50,7 @@ end
|
|||||||
|
|
||||||
local function utf8gmatch(str, regex)
|
local function utf8gmatch(str, regex)
|
||||||
regex = (utf8sub(regex,1,1) ~= '^') and regex or '%' .. regex
|
regex = (utf8sub(regex,1,1) ~= '^') and regex or '%' .. regex
|
||||||
local func = get_matcher_function(regex, plain, utf8)
|
local func = get_matcher_function(regex, false)
|
||||||
local ctx, result, captures
|
local ctx, result, captures
|
||||||
local continue_pos = 1
|
local continue_pos = 1
|
||||||
|
|
||||||
@ -71,7 +87,7 @@ local function replace(repl, args)
|
|||||||
else
|
else
|
||||||
num = tonumber(c)
|
num = tonumber(c)
|
||||||
if num then
|
if num then
|
||||||
ret = ret .. args[num]
|
ret = ret .. assert(args[num], "invalid capture index %" .. c)
|
||||||
else
|
else
|
||||||
ret = ret .. c
|
ret = ret .. c
|
||||||
end
|
end
|
||||||
@ -79,13 +95,9 @@ local function replace(repl, args)
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
elseif type(repl) == 'table' then
|
elseif type(repl) == 'table' then
|
||||||
ret = repl[args[1] or args[0]] or ''
|
ret = repl[args[1]] or args[0]
|
||||||
elseif type(repl) == 'function' then
|
elseif type(repl) == 'function' then
|
||||||
if #args > 0 then
|
ret = repl(unpack(args, 1)) or args[0]
|
||||||
ret = repl(unpack(args, 1)) or ''
|
|
||||||
else
|
|
||||||
ret = repl(args[0]) or ''
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
return ret
|
return ret
|
||||||
end
|
end
|
||||||
@ -95,8 +107,7 @@ local function utf8gsub(str, regex, repl, limit)
|
|||||||
local subbed = ''
|
local subbed = ''
|
||||||
local prev_sub_finish = 1
|
local prev_sub_finish = 1
|
||||||
|
|
||||||
regex = (utf8sub(regex,1,1) ~= '^') and regex or '%' .. regex
|
local func = get_matcher_function(regex, false)
|
||||||
local func = get_matcher_function(regex, plain, utf8)
|
|
||||||
local ctx, result, captures
|
local ctx, result, captures
|
||||||
local continue_pos = 1
|
local continue_pos = 1
|
||||||
|
|
||||||
@ -111,7 +122,13 @@ local function utf8gsub(str, regex, repl, limit)
|
|||||||
utf8.debug('captures:', captures)
|
utf8.debug('captures:', captures)
|
||||||
|
|
||||||
continue_pos = math.max(result.finish + 1, result.start + 1)
|
continue_pos = math.max(result.finish + 1, result.start + 1)
|
||||||
local args = {[0] = utf8sub(str, result.start, result.finish), unpack(captures)}
|
local args
|
||||||
|
if #captures > 0 then
|
||||||
|
args = {[0] = utf8sub(str, result.start, result.finish), unpack(captures)}
|
||||||
|
else
|
||||||
|
args = {[0] = utf8sub(str, result.start, result.finish)}
|
||||||
|
args[1] = args[0]
|
||||||
|
end
|
||||||
|
|
||||||
subbed = subbed .. utf8sub(str, prev_sub_finish, result.start - 1)
|
subbed = subbed .. utf8sub(str, prev_sub_finish, result.start - 1)
|
||||||
subbed = subbed .. replace(repl, args)
|
subbed = subbed .. replace(repl, args)
|
||||||
|
12
loveframes/third-party/utf8/init.lua
vendored
12
loveframes/third-party/utf8/init.lua
vendored
@ -1,5 +1,8 @@
|
|||||||
local module_path = ...
|
local module_path = ...
|
||||||
module_path = module_path:match("^(.-)init$") or (module_path .. '.')
|
module_path = module_path:match("^(.-)init$") or (module_path .. '.')
|
||||||
|
|
||||||
|
local ffi_enabled, ffi = pcall(require, 'ffi')
|
||||||
|
|
||||||
local utf8 = {
|
local utf8 = {
|
||||||
config = {},
|
config = {},
|
||||||
default = {
|
default = {
|
||||||
@ -15,7 +18,14 @@ local utf8 = {
|
|||||||
__mode = 'kv'
|
__mode = 'kv'
|
||||||
}),
|
}),
|
||||||
},
|
},
|
||||||
locale = "C.UTF-8",
|
locale = nil,
|
||||||
|
int32array = function(size)
|
||||||
|
if ffi_enabled then
|
||||||
|
return ffi.new("uint32_t[?]", size + 1)
|
||||||
|
else
|
||||||
|
return {}
|
||||||
|
end
|
||||||
|
end
|
||||||
},
|
},
|
||||||
regex = {
|
regex = {
|
||||||
compiletime = {
|
compiletime = {
|
||||||
|
@ -7,12 +7,13 @@ local matchers = {
|
|||||||
local ]] .. class_name .. [[ = ]] .. class .. [[
|
local ]] .. class_name .. [[ = ]] .. class .. [[
|
||||||
|
|
||||||
add(function(ctx) -- frontier
|
add(function(ctx) -- frontier
|
||||||
ctx.pos = ctx.pos - 1
|
ctx:prev_char()
|
||||||
local prev_charcode = ctx:get_charcode()
|
local prev_charcode = ctx:get_charcode() or 0
|
||||||
ctx:next_char()
|
ctx:next_char()
|
||||||
debug("frontier pos", ctx.pos, "prev_charcode", prev_charcode, "charcode", ctx:get_charcode())
|
local charcode = ctx:get_charcode() or 0
|
||||||
|
-- debug("frontier pos", ctx.pos, "prev_charcode", prev_charcode, "charcode", charcode)
|
||||||
if ]] .. class_name .. [[:test(prev_charcode) then return end
|
if ]] .. class_name .. [[:test(prev_charcode) then return end
|
||||||
if ]] .. class_name .. [[:test(ctx:get_charcode()) then
|
if ]] .. class_name .. [[:test(charcode) then
|
||||||
ctx:next_function()
|
ctx:next_function()
|
||||||
return ctx:get_function()(ctx)
|
return ctx:get_function()(ctx)
|
||||||
end
|
end
|
||||||
|
@ -9,7 +9,7 @@ utf8.config.modifier = utf8.config.modifier or {
|
|||||||
function utf8.regex.compiletime.modifier.parse(regex, c, bs, ctx)
|
function utf8.regex.compiletime.modifier.parse(regex, c, bs, ctx)
|
||||||
for _, m in ipairs(utf8.config.modifier) do
|
for _, m in ipairs(utf8.config.modifier) do
|
||||||
local functions, move = m.parse(regex, c, bs, ctx)
|
local functions, move = m.parse(regex, c, bs, ctx)
|
||||||
utf8.debug("mod", _, c, bs, nbs, move, functions and utf8.config.unpack(functions))
|
utf8.debug("mod", _, c, bs, move, functions and utf8.config.unpack(functions))
|
||||||
if functions then
|
if functions then
|
||||||
ctx.prev_class = nil
|
ctx.prev_class = nil
|
||||||
return functions, move
|
return functions, move
|
||||||
|
@ -7,7 +7,7 @@ local matchers = {
|
|||||||
local ]] .. class_name .. [[ = ]] .. class .. [[
|
local ]] .. class_name .. [[ = ]] .. class .. [[
|
||||||
|
|
||||||
add(function(ctx) -- simple
|
add(function(ctx) -- simple
|
||||||
debug(ctx, 'simple', ']] .. class_name .. [[')
|
-- debug(ctx, 'simple', ']] .. class_name .. [[')
|
||||||
if ]] .. class_name .. [[:test(ctx:get_charcode()) then
|
if ]] .. class_name .. [[:test(ctx:get_charcode()) then
|
||||||
ctx:next_char()
|
ctx:next_char()
|
||||||
ctx:next_function()
|
ctx:next_function()
|
||||||
|
@ -15,7 +15,7 @@ end
|
|||||||
|
|
||||||
local function check(ctx)
|
local function check(ctx)
|
||||||
if ctx.prev_class then
|
if ctx.prev_class then
|
||||||
table.insert(ctx.funcs, matchers.simple(ctx.prev_class, tostring(bs)))
|
table.insert(ctx.funcs, matchers.simple(ctx.prev_class, tostring(ctx.pos)))
|
||||||
ctx.prev_class = nil
|
ctx.prev_class = nil
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
@ -10,18 +10,21 @@ local matchers = {
|
|||||||
local ]] .. class_name .. [[ = ]] .. class .. [[
|
local ]] .. class_name .. [[ = ]] .. class .. [[
|
||||||
|
|
||||||
add(function(ctx) -- star
|
add(function(ctx) -- star
|
||||||
debug(ctx, 'star', ']] .. class_name .. [[')
|
-- debug(ctx, 'star', ']] .. class_name .. [[')
|
||||||
local saved = {ctx:clone()}
|
local clone = ctx:clone()
|
||||||
while ]] .. class_name .. [[:test(ctx:get_charcode()) do
|
while ]] .. class_name .. [[:test(clone:get_charcode()) do
|
||||||
ctx:next_char()
|
clone:next_char()
|
||||||
table.insert(saved, ctx:clone())
|
|
||||||
debug('#saved <<', #saved)
|
|
||||||
end
|
end
|
||||||
while #saved > 0 do
|
local pos = clone.pos
|
||||||
ctx = table.remove(saved)
|
while pos >= ctx.pos do
|
||||||
ctx:next_function()
|
clone.pos = pos
|
||||||
ctx:get_function()(ctx)
|
clone.func_pos = ctx.func_pos
|
||||||
debug('#saved >>', #saved)
|
clone:next_function()
|
||||||
|
clone:get_function()(clone)
|
||||||
|
if clone.modified then
|
||||||
|
clone = ctx:clone()
|
||||||
|
end
|
||||||
|
pos = pos - 1
|
||||||
end
|
end
|
||||||
end)
|
end)
|
||||||
]]
|
]]
|
||||||
@ -32,15 +35,23 @@ local matchers = {
|
|||||||
local ]] .. class_name .. [[ = ]] .. class .. [[
|
local ]] .. class_name .. [[ = ]] .. class .. [[
|
||||||
|
|
||||||
add(function(ctx) -- minus
|
add(function(ctx) -- minus
|
||||||
debug(ctx, 'minus', ']] .. class_name .. [[')
|
-- debug(ctx, 'minus', ']] .. class_name .. [[')
|
||||||
|
|
||||||
|
local clone = ctx:clone()
|
||||||
|
local pos
|
||||||
repeat
|
repeat
|
||||||
local saved = ctx:clone()
|
pos = clone.pos
|
||||||
ctx:next_function()
|
clone:next_function()
|
||||||
ctx:get_function()(ctx)
|
clone:get_function()(clone)
|
||||||
ctx = saved
|
if clone.modified then
|
||||||
local match = ]] .. class_name .. [[:test(ctx:get_charcode())
|
clone = ctx:clone()
|
||||||
ctx:next_char()
|
clone.pos = pos
|
||||||
|
else
|
||||||
|
clone.pos = pos
|
||||||
|
clone.func_pos = ctx.func_pos
|
||||||
|
end
|
||||||
|
local match = ]] .. class_name .. [[:test(clone:get_charcode())
|
||||||
|
clone:next_char()
|
||||||
until not match
|
until not match
|
||||||
end)
|
end)
|
||||||
]]
|
]]
|
||||||
@ -51,7 +62,7 @@ local matchers = {
|
|||||||
local ]] .. class_name .. [[ = ]] .. class .. [[
|
local ]] .. class_name .. [[ = ]] .. class .. [[
|
||||||
|
|
||||||
add(function(ctx) -- question
|
add(function(ctx) -- question
|
||||||
debug(ctx, 'question', ']] .. class_name .. [[')
|
-- debug(ctx, 'question', ']] .. class_name .. [[')
|
||||||
local saved = ctx:clone()
|
local saved = ctx:clone()
|
||||||
if ]] .. class_name .. [[:test(ctx:get_charcode()) then
|
if ]] .. class_name .. [[:test(ctx:get_charcode()) then
|
||||||
ctx:next_char()
|
ctx:next_char()
|
||||||
@ -67,8 +78,9 @@ local matchers = {
|
|||||||
capture_start = function(number)
|
capture_start = function(number)
|
||||||
return [[
|
return [[
|
||||||
add(function(ctx)
|
add(function(ctx)
|
||||||
debug(ctx, 'capture_start', ']] .. tostring(number) .. [[')
|
ctx.modified = true
|
||||||
table.insert(ctx.captures.active, { id = ]] .. tostring(number) .. [[, start_byte = byte_pos, start = ctx.pos })
|
-- debug(ctx, 'capture_start', ']] .. tostring(number) .. [[')
|
||||||
|
table.insert(ctx.captures.active, { id = ]] .. tostring(number) .. [[, start = ctx.pos })
|
||||||
ctx:next_function()
|
ctx:next_function()
|
||||||
return ctx:get_function()(ctx)
|
return ctx:get_function()(ctx)
|
||||||
end)
|
end)
|
||||||
@ -77,12 +89,34 @@ local matchers = {
|
|||||||
capture_finish = function(number)
|
capture_finish = function(number)
|
||||||
return [[
|
return [[
|
||||||
add(function(ctx)
|
add(function(ctx)
|
||||||
debug(ctx, 'capture_finish', ']] .. tostring(number) .. [[')
|
ctx.modified = true
|
||||||
|
-- debug(ctx, 'capture_finish', ']] .. tostring(number) .. [[')
|
||||||
local cap = table.remove(ctx.captures.active)
|
local cap = table.remove(ctx.captures.active)
|
||||||
cap.finish_byte = byte_pos
|
|
||||||
cap.finish = ctx.pos
|
cap.finish = ctx.pos
|
||||||
ctx.captures[cap.id] = utf8sub(ctx.str, cap.start, cap.finish - 1)
|
local b, e = ctx.offsets[cap.start], ctx.offsets[cap.finish]
|
||||||
debug('capture#' .. tostring(cap.id), '[' .. tostring(cap.start).. ',' .. tostring(cap.finish) .. ']' , 'is', ctx.captures[cap.id])
|
if cap.start < 1 then
|
||||||
|
b = 1
|
||||||
|
elseif cap.start >= ctx.len then
|
||||||
|
b = ctx.rawlen + 1
|
||||||
|
end
|
||||||
|
if cap.finish < 1 then
|
||||||
|
e = 1
|
||||||
|
elseif cap.finish >= ctx.len then
|
||||||
|
e = ctx.rawlen + 1
|
||||||
|
end
|
||||||
|
ctx.captures[cap.id] = rawsub(ctx.str, b, e - 1)
|
||||||
|
-- debug('capture#' .. tostring(cap.id), '[' .. tostring(cap.start).. ',' .. tostring(cap.finish) .. ']' , 'is', ctx.captures[cap.id])
|
||||||
|
ctx:next_function()
|
||||||
|
return ctx:get_function()(ctx)
|
||||||
|
end)
|
||||||
|
]]
|
||||||
|
end,
|
||||||
|
capture_position = function(number)
|
||||||
|
return [[
|
||||||
|
add(function(ctx)
|
||||||
|
ctx.modified = true
|
||||||
|
-- debug(ctx, 'capture_position', ']] .. tostring(number) .. [[')
|
||||||
|
ctx.captures[ ]] .. tostring(number) .. [[ ] = ctx.pos
|
||||||
ctx:next_function()
|
ctx:next_function()
|
||||||
return ctx:get_function()(ctx)
|
return ctx:get_function()(ctx)
|
||||||
end)
|
end)
|
||||||
@ -91,11 +125,11 @@ local matchers = {
|
|||||||
capture = function(number)
|
capture = function(number)
|
||||||
return [[
|
return [[
|
||||||
add(function(ctx)
|
add(function(ctx)
|
||||||
debug(ctx, 'capture', ']] .. tostring(number) .. [[')
|
-- debug(ctx, 'capture', ']] .. tostring(number) .. [[')
|
||||||
local cap = ctx.captures[ ]] .. tostring(number) .. [[ ]
|
local cap = ctx.captures[ ]] .. tostring(number) .. [[ ]
|
||||||
local len = utf8len(cap)
|
local len = utf8len(cap)
|
||||||
local check = utf8sub(ctx.str, ctx.pos, ctx.pos + len - 1)
|
local check = utf8sub(ctx.str, ctx.pos, ctx.pos + len - 1)
|
||||||
debug("capture check:", cap, check)
|
-- debug("capture check:", cap, check)
|
||||||
if cap == check then
|
if cap == check then
|
||||||
ctx.pos = ctx.pos + len
|
ctx.pos = ctx.pos + len
|
||||||
ctx:next_function()
|
ctx:next_function()
|
||||||
@ -121,9 +155,9 @@ local matchers = {
|
|||||||
elseif c == b then
|
elseif c == b then
|
||||||
balance = balance - 1
|
balance = balance - 1
|
||||||
end
|
end
|
||||||
debug("balancer: balance=", balance, ", d=", d, ", b=", b, ", charcode=", ctx:get_charcode())
|
-- debug("balancer: balance=", balance, ", d=", d, ", b=", b, ", charcode=", ctx:get_charcode())
|
||||||
ctx:next_char()
|
ctx:next_char()
|
||||||
until balance == 0
|
until balance == 0 or (balance == 2 and d == b)
|
||||||
ctx:next_function()
|
ctx:next_function()
|
||||||
return ctx:get_function()(ctx)
|
return ctx:get_function()(ctx)
|
||||||
end)
|
end)
|
||||||
@ -139,6 +173,9 @@ local function parse(regex, c, bs, ctx)
|
|||||||
if c == '%' then
|
if c == '%' then
|
||||||
c, nbs = next(regex, bs)
|
c, nbs = next(regex, bs)
|
||||||
utf8.debug("next", c, bs)
|
utf8.debug("next", c, bs)
|
||||||
|
if c == '' then
|
||||||
|
error("malformed pattern (ends with '%')")
|
||||||
|
end
|
||||||
if utf8.raw.find('123456789', c, 1, true) then
|
if utf8.raw.find('123456789', c, 1, true) then
|
||||||
functions = { matchers.capture(tonumber(c)) }
|
functions = { matchers.capture(tonumber(c)) }
|
||||||
nbs = utf8.next(regex, nbs)
|
nbs = utf8.next(regex, nbs)
|
||||||
@ -146,6 +183,7 @@ local function parse(regex, c, bs, ctx)
|
|||||||
local d, b
|
local d, b
|
||||||
d, nbs = next(regex, nbs)
|
d, nbs = next(regex, nbs)
|
||||||
b, nbs = next(regex, nbs)
|
b, nbs = next(regex, nbs)
|
||||||
|
assert(d ~= '' and b ~= '', "unbalanced pattern")
|
||||||
functions = { matchers.balancer({d, b}, tostring(bs)) }
|
functions = { matchers.balancer({d, b}, tostring(bs)) }
|
||||||
nbs = utf8.next(regex, nbs)
|
nbs = utf8.next(regex, nbs)
|
||||||
end
|
end
|
||||||
@ -191,13 +229,19 @@ local function parse(regex, c, bs, ctx)
|
|||||||
nbs = bs + 1
|
nbs = bs + 1
|
||||||
elseif c == '(' then
|
elseif c == '(' then
|
||||||
ctx.capture = ctx.capture or {balance = 0, id = 0}
|
ctx.capture = ctx.capture or {balance = 0, id = 0}
|
||||||
ctx.capture.balance = ctx.capture.balance + 1
|
|
||||||
ctx.capture.id = ctx.capture.id + 1
|
ctx.capture.id = ctx.capture.id + 1
|
||||||
functions = { matchers.capture_start(ctx.capture.id) }
|
local nc = next(regex, nbs)
|
||||||
|
if nc == ')' then
|
||||||
|
functions = {matchers.capture_position(ctx.capture.id)}
|
||||||
|
nbs = bs + 2
|
||||||
|
else
|
||||||
|
ctx.capture.balance = ctx.capture.balance + 1
|
||||||
|
functions = {matchers.capture_start(ctx.capture.id)}
|
||||||
|
nbs = bs + 1
|
||||||
|
end
|
||||||
if ctx.prev_class then
|
if ctx.prev_class then
|
||||||
table.insert(functions, 1, matchers.simple(ctx.prev_class, tostring(bs)))
|
table.insert(functions, 1, matchers.simple(ctx.prev_class, tostring(bs)))
|
||||||
end
|
end
|
||||||
nbs = bs + 1
|
|
||||||
elseif c == ')' then
|
elseif c == ')' then
|
||||||
ctx.capture = ctx.capture or {balance = 0, id = 0}
|
ctx.capture = ctx.capture or {balance = 0, id = 0}
|
||||||
functions = { matchers.capture_finish(ctx.capture.id) }
|
functions = { matchers.capture_finish(ctx.capture.id) }
|
||||||
|
39
loveframes/third-party/utf8/primitives/dummy.lua
vendored
39
loveframes/third-party/utf8/primitives/dummy.lua
vendored
@ -80,8 +80,14 @@ local function utf8symbollen(byte)
|
|||||||
return not byte and 0 or (byte < 0x80 and 1) or (byte >= 0xF0 and 4) or (byte >= 0xE0 and 3) or (byte >= 0xC0 and 2) or 1
|
return not byte and 0 or (byte < 0x80 and 1) or (byte >= 0xF0 and 4) or (byte >= 0xE0 and 3) or (byte >= 0xC0 and 2) or 1
|
||||||
end
|
end
|
||||||
|
|
||||||
|
local head_table = utf8.config.int32array(256)
|
||||||
|
for i = 0, 255 do
|
||||||
|
head_table[i] = utf8symbollen(i)
|
||||||
|
end
|
||||||
|
head_table[256] = 0
|
||||||
|
|
||||||
local function utf8charbytes(str, bs)
|
local function utf8charbytes(str, bs)
|
||||||
return utf8symbollen(byte(str, bs))
|
return head_table[byte(str, bs) or 256]
|
||||||
end
|
end
|
||||||
|
|
||||||
local function utf8next(str, bs)
|
local function utf8next(str, bs)
|
||||||
@ -201,13 +207,12 @@ utf8unicode = function(str, ibs, jbs)
|
|||||||
|
|
||||||
bytes = utf8charbytes(str, ibs)
|
bytes = utf8charbytes(str, ibs)
|
||||||
if bytes == 0 then return end
|
if bytes == 0 then return end
|
||||||
ch = sub(str,ibs,ibs-1+bytes)
|
|
||||||
|
|
||||||
local unicode
|
local unicode
|
||||||
|
|
||||||
if bytes == 1 then unicode = byte(ch) end
|
if bytes == 1 then unicode = byte(str, ibs, ibs) end
|
||||||
if bytes == 2 then
|
if bytes == 2 then
|
||||||
local byte0,byte1 = byte(ch,1,2)
|
local byte0,byte1 = byte(str, ibs, ibs + 1)
|
||||||
if byte0 and byte1 then
|
if byte0 and byte1 then
|
||||||
local code0,code1 = byte0-0xC0,byte1-0x80
|
local code0,code1 = byte0-0xC0,byte1-0x80
|
||||||
unicode = code0*shift_6 + code1
|
unicode = code0*shift_6 + code1
|
||||||
@ -216,7 +221,7 @@ utf8unicode = function(str, ibs, jbs)
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
if bytes == 3 then
|
if bytes == 3 then
|
||||||
local byte0,byte1,byte2 = byte(ch,1,3)
|
local byte0,byte1,byte2 = byte(str, ibs, ibs + 2)
|
||||||
if byte0 and byte1 and byte2 then
|
if byte0 and byte1 and byte2 then
|
||||||
local code0,code1,code2 = byte0-0xE0,byte1-0x80,byte2-0x80
|
local code0,code1,code2 = byte0-0xE0,byte1-0x80,byte2-0x80
|
||||||
unicode = code0*shift_12 + code1*shift_6 + code2
|
unicode = code0*shift_12 + code1*shift_6 + code2
|
||||||
@ -225,7 +230,7 @@ utf8unicode = function(str, ibs, jbs)
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
if bytes == 4 then
|
if bytes == 4 then
|
||||||
local byte0,byte1,byte2,byte3 = byte(ch,1,4)
|
local byte0,byte1,byte2,byte3 = byte(str, ibs, ibs + 3)
|
||||||
if byte0 and byte1 and byte2 and byte3 then
|
if byte0 and byte1 and byte2 and byte3 then
|
||||||
local code0,code1,code2,code3 = byte0-0xF0,byte1-0x80,byte2-0x80,byte3-0x80
|
local code0,code1,code2,code3 = byte0-0xF0,byte1-0x80,byte2-0x80,byte3-0x80
|
||||||
unicode = code0*shift_18 + code1*shift_12 + code2*shift_6 + code3
|
unicode = code0*shift_18 + code1*shift_12 + code2*shift_6 + code3
|
||||||
@ -234,7 +239,11 @@ utf8unicode = function(str, ibs, jbs)
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
return unicode,utf8unicode(str, ibs+bytes, jbs)
|
if ibs == jbs then
|
||||||
|
return unicode
|
||||||
|
else
|
||||||
|
return unicode,utf8unicode(str, ibs+bytes, jbs)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
local function utf8byte(str, i, j)
|
local function utf8byte(str, i, j)
|
||||||
@ -281,7 +290,7 @@ local function utf8gensub(str, sub_len)
|
|||||||
return function(skip_ptr, bs)
|
return function(skip_ptr, bs)
|
||||||
bs = (bs and bs or 1) + (skip_ptr and (skip_ptr[1] or 0) or 0)
|
bs = (bs and bs or 1) + (skip_ptr and (skip_ptr[1] or 0) or 0)
|
||||||
|
|
||||||
nbs = bs
|
local nbs = bs
|
||||||
if bs > max_len then return nil end
|
if bs > max_len then return nil end
|
||||||
for i = 1, sub_len do
|
for i = 1, sub_len do
|
||||||
nbs = utf8next(str, nbs)
|
nbs = utf8next(str, nbs)
|
||||||
@ -427,7 +436,7 @@ local function utf8offset(str, n, bs)
|
|||||||
bs = 1
|
bs = 1
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
if bs < 0 or bs > l + 1 then
|
if bs <= 0 or bs > l + 1 then
|
||||||
error("bad argument #3 to 'offset' (position out of range)")
|
error("bad argument #3 to 'offset' (position out of range)")
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -437,8 +446,8 @@ local function utf8offset(str, n, bs)
|
|||||||
end
|
end
|
||||||
while true do
|
while true do
|
||||||
local b = byte(str, bs)
|
local b = byte(str, bs)
|
||||||
if 0 < b and b < 127
|
if (0 < b and b < 127)
|
||||||
or 194 < b and b < 244 then
|
or (194 < b and b < 244) then
|
||||||
return bs
|
return bs
|
||||||
end
|
end
|
||||||
bs = bs - 1
|
bs = bs - 1
|
||||||
@ -454,8 +463,8 @@ local function utf8offset(str, n, bs)
|
|||||||
end
|
end
|
||||||
|
|
||||||
local b = byte(str, bs)
|
local b = byte(str, bs)
|
||||||
if 0 < b and b < 127
|
if (0 < b and b < 127)
|
||||||
or 194 < b and b < 244 then
|
or (194 < b and b < 244) then
|
||||||
n = n + 1
|
n = n + 1
|
||||||
end
|
end
|
||||||
bs = bs - 1
|
bs = bs - 1
|
||||||
@ -468,8 +477,8 @@ local function utf8offset(str, n, bs)
|
|||||||
end
|
end
|
||||||
|
|
||||||
local b = byte(str, bs)
|
local b = byte(str, bs)
|
||||||
if 0 < b and b < 127
|
if (0 < b and b < 127)
|
||||||
or 194 < b and b < 244 then
|
or (194 < b and b < 244) then
|
||||||
n = n - 1
|
n = n - 1
|
||||||
for i = 1, n do
|
for i = 1, n do
|
||||||
if bs > l then
|
if bs > l then
|
||||||
|
@ -1,12 +1,19 @@
|
|||||||
return function(utf8)
|
return function(utf8)
|
||||||
|
|
||||||
os.setlocale(utf8.config.locale, "ctype")
|
local ffi = require("ffi")
|
||||||
|
if ffi.os == "Windows" then
|
||||||
local ffi = require("ffi")
|
os.setlocale(utf8.config.locale or "english_us.65001", "ctype")
|
||||||
ffi.cdef[[
|
ffi.cdef[[
|
||||||
int towupper(int c);
|
short towupper(short c);
|
||||||
int towlower(int c);
|
short towlower(short c);
|
||||||
]]
|
]]
|
||||||
|
else
|
||||||
|
os.setlocale(utf8.config.locale or "C.UTF-8", "ctype")
|
||||||
|
ffi.cdef[[
|
||||||
|
int towupper(int c);
|
||||||
|
int towlower(int c);
|
||||||
|
]]
|
||||||
|
end
|
||||||
|
|
||||||
utf8:require "primitives.dummy"
|
utf8:require "primitives.dummy"
|
||||||
|
|
||||||
|
2
loveframes/third-party/utf8/regex_parser.lua
vendored
2
loveframes/third-party/utf8/regex_parser.lua
vendored
@ -58,7 +58,9 @@ return function(regex, plain)
|
|||||||
local ctx = utf8:require("context.runtime").new({str = str, pos = init or 1})
|
local ctx = utf8:require("context.runtime").new({str = str, pos = init or 1})
|
||||||
local cl = utf8:require("charclass.runtime.init")
|
local cl = utf8:require("charclass.runtime.init")
|
||||||
local utf8sub = utf8.sub
|
local utf8sub = utf8.sub
|
||||||
|
local rawsub = utf8.raw.sub
|
||||||
local utf8len = utf8.len
|
local utf8len = utf8.len
|
||||||
|
local utf8next = utf8.next
|
||||||
local debug = utf8.debug
|
local debug = utf8.debug
|
||||||
local function add(fun)
|
local function add(fun)
|
||||||
ctx.functions[#ctx.functions + 1] = fun
|
ctx.functions[#ctx.functions + 1] = fun
|
||||||
|
Loading…
Reference in New Issue
Block a user