mirror of
https://github.com/msva/lua-htmlparser.git
synced 2024-11-27 12:44:22 +00:00
[attribute="value"] working
This commit is contained in:
parent
77f24f93be
commit
de746865be
@ -88,14 +88,14 @@ function ElementNode:close(closestart, closeend)
|
|||||||
end
|
end
|
||||||
|
|
||||||
local function select(self, s)
|
local function select(self, s)
|
||||||
if not s or type(s) ~= "string" then return Set:new() end
|
if not s or type(s) ~= "string" or s == "" then return Set:new() end
|
||||||
local subjects, resultset, childrenonly = Set:new({self})
|
|
||||||
local sets = {
|
local sets = {
|
||||||
[""] = self.deeperelements,
|
[""] = self.deeperelements,
|
||||||
["["] = self.deeperattributes,
|
["["] = self.deeperattributes,
|
||||||
["#"] = self.deeperids,
|
["#"] = self.deeperids,
|
||||||
["."] = self.deeperclasses
|
["."] = self.deeperclasses
|
||||||
}
|
}
|
||||||
|
local subjects, resultset, childrenonly = Set:new({self})
|
||||||
for part in string.gmatch(s, "%S+") do
|
for part in string.gmatch(s, "%S+") do
|
||||||
if part == ">" then childrenonly = true goto nextpart end
|
if part == ">" then childrenonly = true goto nextpart end
|
||||||
resultset = Set:new()
|
resultset = Set:new()
|
||||||
@ -107,13 +107,24 @@ local function select(self, s)
|
|||||||
end
|
end
|
||||||
if part == "*" then goto nextpart end
|
if part == "*" then goto nextpart end
|
||||||
local excludes, filter = Set:new()
|
local excludes, filter = Set:new()
|
||||||
for t, w in string.gmatch(part,
|
for t, w, v in string.gmatch(part,
|
||||||
"([:%[#.]?)" .. -- t = an optional :, [, #, or .
|
"([:%[#.]?)" .. -- t = an optional :, [, #, or .
|
||||||
"([^:%(%[#.%]%)]+)" .. -- w = 1 or more of anything not :, (, [, #, ., ], or )
|
"([^:%(%[#.%]%)]+)" .. -- w = 1 or more of anything not :, (, [, #, ., ], or )
|
||||||
"%]?%)?" -- followed by an uncaptured optional ] and/or )
|
"%]?%)?" -- followed by an uncaptured optional ] and/or )
|
||||||
) do
|
) do
|
||||||
if t == ":" then filter = w goto nextw end
|
if t == ":" then filter = w goto nextw end
|
||||||
|
if t == "[" then
|
||||||
|
w, v = string.match(w, "([^=]+)=?(%S*)")
|
||||||
|
end
|
||||||
local match = sets[t][w]
|
local match = sets[t][w]
|
||||||
|
if v and v ~= "" then
|
||||||
|
v = string.sub(v, 2, #v - 1) -- strip quotes
|
||||||
|
for node in pairs(match) do
|
||||||
|
if node.attributes[w] ~= v then
|
||||||
|
match:remove(node)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
if filter == "not" then
|
if filter == "not" then
|
||||||
excludes = excludes + match
|
excludes = excludes + match
|
||||||
else
|
else
|
||||||
|
25
test.lua
25
test.lua
@ -51,6 +51,13 @@ select("ul > *")
|
|||||||
select("body [class]")
|
select("body [class]")
|
||||||
select("body > [class]")
|
select("body > [class]")
|
||||||
|
|
||||||
|
select(".contacts span:not(.firstname)")
|
||||||
|
select(":not(a)[href]")
|
||||||
|
select("[itemscope]:not([itemprop])")
|
||||||
|
|
||||||
|
select("link[rel='alternate']")
|
||||||
|
select("[test2=\"val='2'\"]")
|
||||||
|
|
||||||
print("\nchapters")
|
print("\nchapters")
|
||||||
local sel, chapters = root("ol.chapters > li"), {}
|
local sel, chapters = root("ol.chapters > li"), {}
|
||||||
for e in pairs(sel) do
|
for e in pairs(sel) do
|
||||||
@ -114,21 +121,3 @@ end
|
|||||||
for node,table in pairs(scopes) do
|
for node,table in pairs(scopes) do
|
||||||
printscope(node, table)
|
printscope(node, table)
|
||||||
end
|
end
|
||||||
|
|
||||||
print("\nnot firstname")
|
|
||||||
local sel = root(".contacts span:not(.firstname)")
|
|
||||||
for e in pairs(sel) do
|
|
||||||
print(e.classes[1], e:getcontent())
|
|
||||||
end
|
|
||||||
|
|
||||||
print("\nnot a hrefs")
|
|
||||||
local sel = root(":not(a)[href]")
|
|
||||||
for e in pairs(sel) do
|
|
||||||
print(e.name, e.attributes["href"])
|
|
||||||
end
|
|
||||||
|
|
||||||
print("\ntop itemscopes")
|
|
||||||
local sel = root("[itemscope]:not([itemprop])")
|
|
||||||
for e in pairs(sel) do
|
|
||||||
print(e.name, e.attributes["itemtype"])
|
|
||||||
end
|
|
Loading…
Reference in New Issue
Block a user