lua-htmlparser/test.lua
Wouter Scherphof 206f7af3c4 closes #6
:not(), [att=val], [att!=val], [att|=val], [att*=val], [att~=val],
[att^=val], [att$=val]
Note that the selection is now returned as a simple Set, breaking the
abilty brought in by #8 and #9 tot :select() or () on the selection. Of
course, the elements in the returned Set are still ElementNodes that
can be selected upon.
2013-03-26 09:57:00 +01:00

137 lines
3.2 KiB
Lua

local htmlparser = require("htmlparser")
local io = require("io")
local file = io.input("./test.html")
local text = io.read("*a") file:close()
local root = htmlparser.parse(text)
-- print the tree
local function p(n)
local space = string.rep(" ", n.level)
local s = space .. n.name
for k,v in pairs(n.attributes) do
s = s .. " " .. k .. "=[[" .. v .. "]]"
end
print(s)
for i,v in ipairs(n.nodes) do
p(v)
end
end
p(root)
local function select( s )
print ""
print("->", s)
local sel = root:select(s)
for element in pairs(sel) do
print(element.name)
end
print(sel:len())
end
select("*")
select("link")
select("#/contacts/4711")
select(".chapters")
select("[href]")
select("span.firstname")
select("ul[id]")
select("#/contacts/4711")
select("#/contacts/4711 *")
select("#/contacts/4711 .lastname")
select("body li[id]")
select("ul")
select("ul *")
select("ul > *")
select("body [class]")
select("body > [class]")
select(".contacts span:not(.firstname)")
select(":not(a)[href]")
select("[itemscope]:not([itemprop])")
select("link[rel='alternate']")
select("[test2=\"val='2'\"]")
select("[test5='val5']")
select("[test6='val\"\"6']")
select("[itemscope='']")
select("[itemscope=]")
select("[itemscope]")
select("[itemscope][itemprop='address']")
select("[itemscope][itemprop!='address']")
select("[itemscope][itemprop!='adres']")
select("[itemscope][itemprop!='']")
select("[hreflang|='en']")
select("[itemprop*='address']")
select("[words~='two']")
select("[words~='three']")
select("[itemprop$='ion']")
select("[hreflang^='en']")
print("\nchapters")
local sel, chapters = root("ol.chapters > li"), {}
for e in pairs(sel) do
table.insert(chapters, e:getcontent())
end
-- print
for i,v in ipairs(chapters) do
print(i, v)
end
print("\ncontacts")
local sel, contacts = root("ul.contacts span[class]"), {}
for e in pairs(sel) do
local id = e.parent.parent.id -- li > a > span
contacts[id] = contacts[id] or {}
contacts[id][e.classes[1]] = e:getcontent()
end
-- print
for k,v in pairs(contacts) do
print(k)
for fk,fv in pairs(v) do
print(fk, fv)
end
end
print("\nmicrodata")
local sel, scopes = root("[itemprop]"), {}
for prop in pairs(sel) do
if prop.attributes["itemscope"] then goto nextprop end
local descendantscopes, scope = {}, prop
while true do
repeat
scope = scope.parent
until scope.attributes["itemscope"]
if not scope.attributes["itemprop"] then break end
table.insert(descendantscopes, 1, scope)
end
scopes[scope] = scopes[scope] or {}
local entry = scopes[scope]
for _,v in ipairs(descendantscopes) do
entry[v] = entry[v] or {}
entry = entry[v]
end
local k, v = prop.attributes["itemprop"], prop:getcontent()
entry[k] = v
::nextprop::
end
-- print
local function printscope(node, table, level)
level = level or 1
local scopeprop = node.attributes["itemprop"] or ""
print(string.rep(" ", level - 1) .. node.attributes["itemtype"], scopeprop)
for prop,v in pairs(table) do
if type(prop) == "table" then
printscope(prop, v, level + 1)
else
print(string.rep(" ", level) .. prop .. "=[" .. v .. "]")
end
end
end
for node,table in pairs(scopes) do
printscope(node, table)
end