2013-03-19 10:37:08 +00:00
|
|
|
local ElementNode = require("ElementNode")
|
|
|
|
local voidelements = require("voidelements")
|
|
|
|
|
|
|
|
local HtmlParser = {}
|
|
|
|
|
|
|
|
local function parse(text)
|
|
|
|
local root = ElementNode:new(text)
|
|
|
|
|
|
|
|
local node, descend, tpos, opentags = root, true, 1, {}
|
|
|
|
while true do
|
|
|
|
local openstart, name
|
|
|
|
openstart, tpos, name = string.find(root._text, "<(%w+)[^>]*>", tpos)
|
|
|
|
if not name then break end
|
|
|
|
local tag = ElementNode:new(name, node, descend, openstart, tpos)
|
|
|
|
node = tag
|
|
|
|
|
|
|
|
local tagst, apos = tag:gettext(), 1
|
|
|
|
while true do
|
2013-03-21 13:03:17 +00:00
|
|
|
local start, k, eq, quote, v
|
|
|
|
start, apos, k, eq, quote = string.find(tagst, "%s+([^%s=]+)(=?)(['\"]?)", apos)
|
2013-03-19 10:37:08 +00:00
|
|
|
if not k then break end
|
2013-03-21 13:03:17 +00:00
|
|
|
if eq == "" then
|
|
|
|
v = ""
|
|
|
|
else
|
|
|
|
local pattern = "=([^%s'\">]*)"
|
|
|
|
if quote ~= '' then
|
|
|
|
pattern = quote .. "([^" .. quote .. "]*)" .. quote
|
|
|
|
end
|
|
|
|
start, apos, v = string.find(tagst, pattern, apos)
|
2013-03-19 10:37:08 +00:00
|
|
|
end
|
|
|
|
tag:addattribute(k, v)
|
|
|
|
end
|
|
|
|
|
|
|
|
if voidelements[string.lower(tag.name)] then
|
|
|
|
descend = false
|
|
|
|
tag:close()
|
|
|
|
else
|
2013-03-21 13:03:17 +00:00
|
|
|
opentags[tag.name] = opentags[tag.name] or {}
|
|
|
|
table.insert(opentags[tag.name], tag)
|
2013-03-19 10:37:08 +00:00
|
|
|
end
|
|
|
|
|
|
|
|
local closeend = tpos
|
|
|
|
while true do
|
|
|
|
local closestart, closing, closename
|
|
|
|
closestart, closeend, closing, closename = string.find(root._text, "[^<]*<(/?)(%w+)", closeend)
|
2013-03-21 13:03:17 +00:00
|
|
|
if not closing or closing == "" then break end
|
|
|
|
tag = table.remove(opentags[closename])
|
2013-03-19 10:37:08 +00:00
|
|
|
closestart = string.find(root._text, "<", closestart)
|
|
|
|
tag:close(closestart, closeend + 1)
|
|
|
|
node = tag.parent
|
|
|
|
descend = true
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
return root
|
|
|
|
end
|
|
|
|
HtmlParser.parse = parse
|
|
|
|
|
|
|
|
return HtmlParser
|
|
|
|
|