first draft

Inital working version in version control
This commit is contained in:
Wouter Scherphof
2013-03-19 11:37:08 +01:00
parent 65aff05b29
commit 76000166e0
7 changed files with 378 additions and 0 deletions

57
HtmlParser.lua Normal file
View File

@@ -0,0 +1,57 @@
local ElementNode = require("ElementNode")
local voidelements = require("voidelements")
local HtmlParser = {}
local function parse(text)
local root = ElementNode:new(text)
local node, descend, tpos, opentags = root, true, 1, {}
while true do
local openstart, name
openstart, tpos, name = string.find(root._text, "<(%w+)[^>]*>", tpos)
if not name then break end
local tag = ElementNode:new(name, node, descend, openstart, tpos)
node = tag
local tagst, apos = tag:gettext(), 1
while true do
local start, k, quote, v
start, apos, k, quote = string.find(tagst, "%s+([^%s=]+)=(['\"]?)", apos)
if not k then break end
local pattern = "=([^%s'\">]*)"
if quote ~= '' then
pattern = quote .. "([^" .. quote .. "]*)" .. quote
end
start, apos, v = string.find(tagst, pattern, apos)
tag:addattribute(k, v)
end
if voidelements[string.lower(tag.name)] then
descend = false
tag:close()
else
opentags[tag.name] = tag
end
local closeend = tpos
while true do
local closestart, closing, closename
closestart, closeend, closing, closename = string.find(root._text, "[^<]*<(/?)(%w+)", closeend)
closing = closing and closing ~= ''
if not closing then break end
tag = opentags[closename]
opentags[closename] = nil
closestart = string.find(root._text, "<", closestart)
tag:close(closestart, closeend + 1)
node = tag.parent
descend = true
end
end
return root
end
HtmlParser.parse = parse
return HtmlParser