mirror of
https://github.com/msva/lua-htmlparser.git
synced 2024-11-27 12:44:22 +00:00
reworking tpl detection; fixes #50
This commit is contained in:
parent
404de0b9be
commit
2e2f306e7f
@ -1,4 +1,4 @@
|
|||||||
-- vim: ft=lua ts=2
|
-- vim: ft=lua ts=2 sw=2
|
||||||
|
|
||||||
local esc = function(s) return string.gsub(s, "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%" .. "%1") end
|
local esc = function(s) return string.gsub(s, "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%" .. "%1") end
|
||||||
local str = tostring
|
local str = tostring
|
||||||
@ -11,16 +11,13 @@ local voidelements = require("htmlparser.voidelements")
|
|||||||
|
|
||||||
local HtmlParser = {}
|
local HtmlParser = {}
|
||||||
|
|
||||||
local tpl_rep={
|
local tpr = {
|
||||||
-- Replace table for template engines syntax that can confuse us.
|
|
||||||
-- Here we're replacing confusing sequences
|
-- Here we're replacing confusing sequences
|
||||||
-- (things looking like tags, but appearing where tags can't)
|
-- (things looking like tags, but appearing where tags can't)
|
||||||
-- with definitelly invalid utf sequence, and later we'll replace them back
|
-- with definitelly invalid utf sequence, and later we'll replace them back
|
||||||
["<%"] = char(208,209),
|
["<"] = char(208,209,208,209),
|
||||||
["%>"] = char(209,208),
|
[">"] = char(209,208,209,208),
|
||||||
}
|
}
|
||||||
local tpl_rep_rev = {}
|
|
||||||
|
|
||||||
|
|
||||||
local function parse(text,limit)
|
local function parse(text,limit)
|
||||||
local text=str(text)
|
local text=str(text)
|
||||||
@ -28,15 +25,38 @@ local function parse(text,limit)
|
|||||||
local limit = limit or htmlparser_looplimit or 1000
|
local limit = limit or htmlparser_looplimit or 1000
|
||||||
|
|
||||||
local tpl = false
|
local tpl = false
|
||||||
for k,v in pairs(tpl_rep) do
|
|
||||||
local mtc="("..esc(k)..")"
|
local function g(id,...)
|
||||||
if text:match(mtc) then
|
local arg={...}
|
||||||
|
arg[id]=tpr[arg[id]]
|
||||||
tpl=true
|
tpl=true
|
||||||
text=text:gsub(mtc,tpl_rep)
|
return table.concat(arg)
|
||||||
tpl_rep_rev[v]=k;
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
text = text
|
||||||
|
:gsub(
|
||||||
|
"(<)"..
|
||||||
|
"([^>]-)"..
|
||||||
|
"(<)",
|
||||||
|
function(...)return g(3,...)end
|
||||||
|
):gsub(
|
||||||
|
"("..tpr["<"]..")"..
|
||||||
|
"([^%w%s])"..
|
||||||
|
"([^%2]-)"..
|
||||||
|
"(%2)"..
|
||||||
|
"(>)"..
|
||||||
|
"([^>]-)"..
|
||||||
|
"(>)",
|
||||||
|
function(...)return g(5,...)end
|
||||||
|
):gsub(
|
||||||
|
[=[(['"])]=]..
|
||||||
|
[=[([^'>"]-)]=]..
|
||||||
|
"(>)"..
|
||||||
|
[=[([^'>"]-)]=]..
|
||||||
|
[=[(['"])]=],
|
||||||
|
function(...)return g(3,...)end
|
||||||
|
)
|
||||||
|
|
||||||
local index = 0
|
local index = 0
|
||||||
local root = ElementNode:new(index, str(text))
|
local root = ElementNode:new(index, str(text))
|
||||||
|
|
||||||
@ -80,7 +100,7 @@ local function parse(text,limit)
|
|||||||
if not k or k == "/>" or k == ">" then break end
|
if not k or k == "/>" or k == ">" then break end
|
||||||
|
|
||||||
if eq == "=" then
|
if eq == "=" then
|
||||||
local pattern = "=([^%s>]*)"
|
pattern = "=([^%s>]*)"
|
||||||
if quote ~= "" then
|
if quote ~= "" then
|
||||||
pattern = quote .. "([^" .. quote .. "]*)" .. quote
|
pattern = quote .. "([^" .. quote .. "]*)" .. quote
|
||||||
end
|
end
|
||||||
@ -90,11 +110,8 @@ local function parse(text,limit)
|
|||||||
v=v or ""
|
v=v or ""
|
||||||
|
|
||||||
if tpl then
|
if tpl then
|
||||||
for rk,rv in pairs(tpl_rep_rev) do
|
for rk,rv in pairs(tpr) do
|
||||||
local mtc="("..esc(rk)..")"
|
v = v:gsub(rv,rk)
|
||||||
if text:match(mtc) then
|
|
||||||
v = v:gsub(mtc,tpl_rep_rev)
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -133,11 +150,8 @@ local function parse(text,limit)
|
|||||||
end
|
end
|
||||||
|
|
||||||
if tpl then
|
if tpl then
|
||||||
for k,v in pairs(tpl_rep_rev) do
|
for k,v in pairs(tpr) do
|
||||||
local mtc="("..esc(k)..")"
|
root._text = root._text:gsub(v,k)
|
||||||
if text:match(mtc) then
|
|
||||||
root._text = root._text:gsub(mtc,tpl_rep_rev)
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
14
tst/init.lua
14
tst/init.lua
@ -1,3 +1,4 @@
|
|||||||
|
-- vim: ft=lua ts=2 sw=2
|
||||||
-- Omit next line in actual module clients; it's only to support development of the module itself
|
-- Omit next line in actual module clients; it's only to support development of the module itself
|
||||||
package.path = "../src/?.lua;" .. package.path
|
package.path = "../src/?.lua;" .. package.path
|
||||||
|
|
||||||
@ -308,6 +309,19 @@ end
|
|||||||
|
|
||||||
function test_loop_limit()
|
function test_loop_limit()
|
||||||
local tree = htmlparser.parse([[
|
local tree = htmlparser.parse([[
|
||||||
|
<a id='1>2'>moo</a>
|
||||||
|
<a id='2>3'>moo</a>
|
||||||
|
<b id='foo<bar'>moo</b>
|
||||||
|
<img <%tpl%> foo=bar></img>
|
||||||
|
<img <%tpl%> />
|
||||||
|
<img <%tpl%>></img>
|
||||||
|
<img <%tpl%>/>
|
||||||
|
<i <=moo=>>k</i>
|
||||||
|
<s <-foo->>o</s>
|
||||||
|
<div <*bar*>></div>
|
||||||
|
<p>
|
||||||
|
<a id="unclosed>Element"> with unclosed attribute</a>
|
||||||
|
</p>
|
||||||
<div data-pic="aa<%=image_url%>bb" ></div>
|
<div data-pic="aa<%=image_url%>bb" ></div>
|
||||||
]]) -- issue#42
|
]]) -- issue#42
|
||||||
assert(1==1)
|
assert(1==1)
|
||||||
|
Loading…
Reference in New Issue
Block a user