reworking tpl detection; fixes #50

This commit is contained in:
Vadim A. Misbakh-Soloviov 2017-07-22 19:00:38 +07:00
parent 404de0b9be
commit 2e2f306e7f
No known key found for this signature in database
GPG Key ID: 26503D349B3B334B
2 changed files with 53 additions and 25 deletions

View File

@ -1,4 +1,4 @@
-- vim: ft=lua ts=2
-- vim: ft=lua ts=2 sw=2
local esc = function(s) return string.gsub(s, "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%" .. "%1") end
local str = tostring
@ -11,16 +11,13 @@ local voidelements = require("htmlparser.voidelements")
local HtmlParser = {}
local tpl_rep={
-- Replace table for template engines syntax that can confuse us.
local tpr = {
-- Here we're replacing confusing sequences
-- (things looking like tags, but appearing where tags can't)
-- with definitelly invalid utf sequence, and later we'll replace them back
["<%"] = char(208,209),
["%>"] = char(209,208),
["<"] = char(208,209,208,209),
[">"] = char(209,208,209,208),
}
local tpl_rep_rev = {}
local function parse(text,limit)
local text=str(text)
@ -28,15 +25,38 @@ local function parse(text,limit)
local limit = limit or htmlparser_looplimit or 1000
local tpl = false
for k,v in pairs(tpl_rep) do
local mtc="("..esc(k)..")"
if text:match(mtc) then
local function g(id,...)
local arg={...}
arg[id]=tpr[arg[id]]
tpl=true
text=text:gsub(mtc,tpl_rep)
tpl_rep_rev[v]=k;
end
return table.concat(arg)
end
text = text
:gsub(
"(<)"..
"([^>]-)"..
"(<)",
function(...)return g(3,...)end
):gsub(
"("..tpr["<"]..")"..
"([^%w%s])"..
"([^%2]-)"..
"(%2)"..
"(>)"..
"([^>]-)"..
"(>)",
function(...)return g(5,...)end
):gsub(
[=[(['"])]=]..
[=[([^'>"]-)]=]..
"(>)"..
[=[([^'>"]-)]=]..
[=[(['"])]=],
function(...)return g(3,...)end
)
local index = 0
local root = ElementNode:new(index, str(text))
@ -80,7 +100,7 @@ local function parse(text,limit)
if not k or k == "/>" or k == ">" then break end
if eq == "=" then
local pattern = "=([^%s>]*)"
pattern = "=([^%s>]*)"
if quote ~= "" then
pattern = quote .. "([^" .. quote .. "]*)" .. quote
end
@ -90,11 +110,8 @@ local function parse(text,limit)
v=v or ""
if tpl then
for rk,rv in pairs(tpl_rep_rev) do
local mtc="("..esc(rk)..")"
if text:match(mtc) then
v = v:gsub(mtc,tpl_rep_rev)
end
for rk,rv in pairs(tpr) do
v = v:gsub(rv,rk)
end
end
@ -133,11 +150,8 @@ local function parse(text,limit)
end
if tpl then
for k,v in pairs(tpl_rep_rev) do
local mtc="("..esc(k)..")"
if text:match(mtc) then
root._text = root._text:gsub(mtc,tpl_rep_rev)
end
for k,v in pairs(tpr) do
root._text = root._text:gsub(v,k)
end
end

View File

@ -1,3 +1,4 @@
-- vim: ft=lua ts=2 sw=2
-- Omit next line in actual module clients; it's only to support development of the module itself
package.path = "../src/?.lua;" .. package.path
@ -308,6 +309,19 @@ end
function test_loop_limit()
local tree = htmlparser.parse([[
<a id='1>2'>moo</a>
<a id='2>3'>moo</a>
<b id='foo<bar'>moo</b>
<img <%tpl%> foo=bar></img>
<img <%tpl%> />
<img <%tpl%>></img>
<img <%tpl%>/>
<i <=moo=>>k</i>
<s <-foo->>o</s>
<div <*bar*>></div>
<p>
<a id="unclosed>Element"> with unclosed attribute</a>
</p>
<div data-pic="aa<%=image_url%>bb" ></div>
]]) -- issue#42
assert(1==1)