mirror of
https://github.com/msva/lua-htmlparser.git
synced 2024-11-27 12:44:22 +00:00
[attribute="value"] working
This commit is contained in:
parent
77f24f93be
commit
de746865be
@ -88,14 +88,14 @@ function ElementNode:close(closestart, closeend)
|
||||
end
|
||||
|
||||
local function select(self, s)
|
||||
if not s or type(s) ~= "string" then return Set:new() end
|
||||
local subjects, resultset, childrenonly = Set:new({self})
|
||||
if not s or type(s) ~= "string" or s == "" then return Set:new() end
|
||||
local sets = {
|
||||
[""] = self.deeperelements,
|
||||
["["] = self.deeperattributes,
|
||||
["#"] = self.deeperids,
|
||||
["."] = self.deeperclasses
|
||||
}
|
||||
local subjects, resultset, childrenonly = Set:new({self})
|
||||
for part in string.gmatch(s, "%S+") do
|
||||
if part == ">" then childrenonly = true goto nextpart end
|
||||
resultset = Set:new()
|
||||
@ -107,13 +107,24 @@ local function select(self, s)
|
||||
end
|
||||
if part == "*" then goto nextpart end
|
||||
local excludes, filter = Set:new()
|
||||
for t, w in string.gmatch(part,
|
||||
for t, w, v in string.gmatch(part,
|
||||
"([:%[#.]?)" .. -- t = an optional :, [, #, or .
|
||||
"([^:%(%[#.%]%)]+)" .. -- w = 1 or more of anything not :, (, [, #, ., ], or )
|
||||
"%]?%)?" -- followed by an uncaptured optional ] and/or )
|
||||
) do
|
||||
if t == ":" then filter = w goto nextw end
|
||||
if t == "[" then
|
||||
w, v = string.match(w, "([^=]+)=?(%S*)")
|
||||
end
|
||||
local match = sets[t][w]
|
||||
if v and v ~= "" then
|
||||
v = string.sub(v, 2, #v - 1) -- strip quotes
|
||||
for node in pairs(match) do
|
||||
if node.attributes[w] ~= v then
|
||||
match:remove(node)
|
||||
end
|
||||
end
|
||||
end
|
||||
if filter == "not" then
|
||||
excludes = excludes + match
|
||||
else
|
||||
|
25
test.lua
25
test.lua
@ -51,6 +51,13 @@ select("ul > *")
|
||||
select("body [class]")
|
||||
select("body > [class]")
|
||||
|
||||
select(".contacts span:not(.firstname)")
|
||||
select(":not(a)[href]")
|
||||
select("[itemscope]:not([itemprop])")
|
||||
|
||||
select("link[rel='alternate']")
|
||||
select("[test2=\"val='2'\"]")
|
||||
|
||||
print("\nchapters")
|
||||
local sel, chapters = root("ol.chapters > li"), {}
|
||||
for e in pairs(sel) do
|
||||
@ -114,21 +121,3 @@ end
|
||||
for node,table in pairs(scopes) do
|
||||
printscope(node, table)
|
||||
end
|
||||
|
||||
print("\nnot firstname")
|
||||
local sel = root(".contacts span:not(.firstname)")
|
||||
for e in pairs(sel) do
|
||||
print(e.classes[1], e:getcontent())
|
||||
end
|
||||
|
||||
print("\nnot a hrefs")
|
||||
local sel = root(":not(a)[href]")
|
||||
for e in pairs(sel) do
|
||||
print(e.name, e.attributes["href"])
|
||||
end
|
||||
|
||||
print("\ntop itemscopes")
|
||||
local sel = root("[itemscope]:not([itemprop])")
|
||||
for e in pairs(sel) do
|
||||
print(e.name, e.attributes["itemtype"])
|
||||
end
|
Loading…
Reference in New Issue
Block a user