mirror of
https://github.com/msva/lua-htmlparser.git
synced 2024-11-04 23:34:20 +00:00
reworking tpl detection; fixes #50
This commit is contained in:
parent
404de0b9be
commit
2e2f306e7f
@ -1,4 +1,4 @@
|
||||
-- vim: ft=lua ts=2
|
||||
-- vim: ft=lua ts=2 sw=2
|
||||
|
||||
local esc = function(s) return string.gsub(s, "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%" .. "%1") end
|
||||
local str = tostring
|
||||
@ -11,16 +11,13 @@ local voidelements = require("htmlparser.voidelements")
|
||||
|
||||
local HtmlParser = {}
|
||||
|
||||
local tpl_rep={
|
||||
-- Replace table for template engines syntax that can confuse us.
|
||||
local tpr = {
|
||||
-- Here we're replacing confusing sequences
|
||||
-- (things looking like tags, but appearing where tags can't)
|
||||
-- with definitelly invalid utf sequence, and later we'll replace them back
|
||||
["<%"] = char(208,209),
|
||||
["%>"] = char(209,208),
|
||||
["<"] = char(208,209,208,209),
|
||||
[">"] = char(209,208,209,208),
|
||||
}
|
||||
local tpl_rep_rev = {}
|
||||
|
||||
|
||||
local function parse(text,limit)
|
||||
local text=str(text)
|
||||
@ -28,15 +25,38 @@ local function parse(text,limit)
|
||||
local limit = limit or htmlparser_looplimit or 1000
|
||||
|
||||
local tpl = false
|
||||
for k,v in pairs(tpl_rep) do
|
||||
local mtc="("..esc(k)..")"
|
||||
if text:match(mtc) then
|
||||
tpl=true
|
||||
text=text:gsub(mtc,tpl_rep)
|
||||
tpl_rep_rev[v]=k;
|
||||
end
|
||||
|
||||
local function g(id,...)
|
||||
local arg={...}
|
||||
arg[id]=tpr[arg[id]]
|
||||
tpl=true
|
||||
return table.concat(arg)
|
||||
end
|
||||
|
||||
text = text
|
||||
:gsub(
|
||||
"(<)"..
|
||||
"([^>]-)"..
|
||||
"(<)",
|
||||
function(...)return g(3,...)end
|
||||
):gsub(
|
||||
"("..tpr["<"]..")"..
|
||||
"([^%w%s])"..
|
||||
"([^%2]-)"..
|
||||
"(%2)"..
|
||||
"(>)"..
|
||||
"([^>]-)"..
|
||||
"(>)",
|
||||
function(...)return g(5,...)end
|
||||
):gsub(
|
||||
[=[(['"])]=]..
|
||||
[=[([^'>"]-)]=]..
|
||||
"(>)"..
|
||||
[=[([^'>"]-)]=]..
|
||||
[=[(['"])]=],
|
||||
function(...)return g(3,...)end
|
||||
)
|
||||
|
||||
local index = 0
|
||||
local root = ElementNode:new(index, str(text))
|
||||
|
||||
@ -80,7 +100,7 @@ local function parse(text,limit)
|
||||
if not k or k == "/>" or k == ">" then break end
|
||||
|
||||
if eq == "=" then
|
||||
local pattern = "=([^%s>]*)"
|
||||
pattern = "=([^%s>]*)"
|
||||
if quote ~= "" then
|
||||
pattern = quote .. "([^" .. quote .. "]*)" .. quote
|
||||
end
|
||||
@ -90,11 +110,8 @@ local function parse(text,limit)
|
||||
v=v or ""
|
||||
|
||||
if tpl then
|
||||
for rk,rv in pairs(tpl_rep_rev) do
|
||||
local mtc="("..esc(rk)..")"
|
||||
if text:match(mtc) then
|
||||
v = v:gsub(mtc,tpl_rep_rev)
|
||||
end
|
||||
for rk,rv in pairs(tpr) do
|
||||
v = v:gsub(rv,rk)
|
||||
end
|
||||
end
|
||||
|
||||
@ -133,11 +150,8 @@ local function parse(text,limit)
|
||||
end
|
||||
|
||||
if tpl then
|
||||
for k,v in pairs(tpl_rep_rev) do
|
||||
local mtc="("..esc(k)..")"
|
||||
if text:match(mtc) then
|
||||
root._text = root._text:gsub(mtc,tpl_rep_rev)
|
||||
end
|
||||
for k,v in pairs(tpr) do
|
||||
root._text = root._text:gsub(v,k)
|
||||
end
|
||||
end
|
||||
|
||||
|
14
tst/init.lua
14
tst/init.lua
@ -1,3 +1,4 @@
|
||||
-- vim: ft=lua ts=2 sw=2
|
||||
-- Omit next line in actual module clients; it's only to support development of the module itself
|
||||
package.path = "../src/?.lua;" .. package.path
|
||||
|
||||
@ -308,6 +309,19 @@ end
|
||||
|
||||
function test_loop_limit()
|
||||
local tree = htmlparser.parse([[
|
||||
<a id='1>2'>moo</a>
|
||||
<a id='2>3'>moo</a>
|
||||
<b id='foo<bar'>moo</b>
|
||||
<img <%tpl%> foo=bar></img>
|
||||
<img <%tpl%> />
|
||||
<img <%tpl%>></img>
|
||||
<img <%tpl%>/>
|
||||
<i <=moo=>>k</i>
|
||||
<s <-foo->>o</s>
|
||||
<div <*bar*>></div>
|
||||
<p>
|
||||
<a id="unclosed>Element"> with unclosed attribute</a>
|
||||
</p>
|
||||
<div data-pic="aa<%=image_url%>bb" ></div>
|
||||
]]) -- issue#42
|
||||
assert(1==1)
|
||||
|
Loading…
Reference in New Issue
Block a user