improved not and lost method chaining

fixed :not() in that it filters after all matches, preventing later
selection of elements that shouldn't have been there
Also, ditched the idea of returning a container node, since it was
complex and didn't add much. The functionality could be reintroduced by
having Set implement the __call or maybe even __index to return the
combined results of all its elements.
This commit is contained in:
Wouter Scherphof 2013-03-24 23:20:03 +01:00
parent 48183bbf04
commit 591e7ebc8b
2 changed files with 43 additions and 54 deletions

View File

@ -17,9 +17,7 @@ function ElementNode:new(nameortext, node, descend, openstart, openend)
deepernodes = Set:new(),
deeperelements = {}, deeperattributes = {}, deeperids = {}, deeperclasses = {}
}
if nameortext == "container" then
instance.root = node
elseif not node then
if not node then
instance.name = "root"
instance.root = instance
instance._text = nameortext
@ -90,58 +88,41 @@ function ElementNode:close(closestart, closeend)
end
local function select(self, s)
if not s or type(s) ~= "string" then return {} end
local subjects = Set:new({self})
local resultset
local childrenonly
if not s or type(s) ~= "string" then return Set:new() end
local subjects, resultset, childrenonly = Set:new({self})
local sets = {
[""] = self.deeperelements,
["["] = self.deeperattributes,
["#"] = self.deeperids,
["."] = self.deeperclasses
}
for part in string.gmatch(s, "%S+") do
if part == ">" then childrenonly = true goto nextpart end
resultset = Set:new()
for subject in pairs(subjects) do
local init = subject.deepernodes
if childrenonly then init = Set:new(subject.nodes) childrenonly = false end
resultset = resultset + init
local star = subject.deepernodes
if childrenonly then star = Set:new(subject.nodes) end
childrenonly = false
resultset = resultset + star
end
if part == "*" then goto nextpart end
local match, filter
local excludes, filter = Set:new()
for t, w in string.gmatch(part, "([:%[#.]?)([^:%(%[#.%]%)]+)%]?%)?") do
-- TODO tidy up
if t == ":" then filter = w goto nextw end
if t == "" then match = self.deeperelements[w]
elseif t == "[" then match = self.deeperattributes[w]
elseif t == "#" then match = self.deeperids[w]
elseif t == "." then match = self.deeperclasses[w]
end
local match = sets[t][w]
if filter == "not" then
resultset = resultset - match
excludes = excludes + match
else
resultset = resultset * match
end
filter = nil
::nextw::
end
resultset = resultset - excludes
subjects = Set:new(resultset)
::nextpart::
end
-- construct a container node for the resultset, so that we can :select() on it
local ret = ElementNode:new("container", self)
for node in pairs(resultset) do
table.insert(ret.nodes, node)
ret.deepernodes = ret.deepernodes + node.deepernodes
for listname,list in pairs({
deeperelements = node.deeperelements,
deeperattributes = node.deeperattributes,
deeperids = node.deeperids,
deeperclasses = node.deeperclasses
}) do
local target = ret[listname]
for k,set in pairs(list) do
-- Set.__add will create an empty Set if not target[k]
target[k] = target[k] + set
end
end
end
return ret
return resultset
end
function ElementNode:select(s) return select(self, s) end

View File

@ -26,11 +26,11 @@ p(root)
local function select( s )
print ""
print("->", s)
local tags = root:select(s)
for i,t in ipairs(tags.nodes) do
print(t.name)
local sel = root:select(s)
for element in pairs(sel) do
print(element.name)
end
print(# tags.nodes)
print(sel:len())
end
select("*")
select("link")
@ -53,8 +53,8 @@ select("body > [class]")
print("\nchapters")
local sel, chapters = root("ol.chapters > li"), {}
for _,v in ipairs(sel.nodes) do
table.insert(chapters, v:getcontent())
for e in pairs(sel) do
table.insert(chapters, e:getcontent())
end
-- print
for i,v in ipairs(chapters) do
@ -62,11 +62,11 @@ for i,v in ipairs(chapters) do
end
print("\ncontacts")
local sel, contacts = root("ul.contacts > li")("span[class]"), {}
for _,v in ipairs(sel.nodes) do
local id = v.parent.parent.id -- li > a > span
local sel, contacts = root("ul.contacts span[class]"), {}
for e in pairs(sel) do
local id = e.parent.parent.id -- li > a > span
contacts[id] = contacts[id] or {}
contacts[id][v.classes[1]] = v:getcontent()
contacts[id][e.classes[1]] = e:getcontent()
end
-- print
for k,v in pairs(contacts) do
@ -78,7 +78,7 @@ end
print("\nmicrodata")
local sel, scopes = root("[itemprop]"), {}
for _,prop in ipairs(sel.nodes) do
for prop in pairs(sel) do
if prop.attributes["itemscope"] then goto nextprop end
local descendantscopes, scope = {}, prop
while true do
@ -115,12 +115,20 @@ for node,table in pairs(scopes) do
printscope(node, table)
end
local sel = root("[itemscope]:not([itemprop])")
for i,v in ipairs(sel.nodes) do
print(v.name)
print("\nnot firstname")
local sel = root(".contacts span:not(.firstname)")
for e in pairs(sel) do
print(e.classes[1], e:getcontent())
end
local sel = root("[href]:not(a)")
for i,v in ipairs(sel.nodes) do
print(v.name)
print("\nnot a hrefs")
local sel = root(":not(a)[href]")
for e in pairs(sel) do
print(e.name, e.attributes["href"])
end
print("\ntop itemscopes")
local sel = root("[itemscope]:not([itemprop])")
for e in pairs(sel) do
print(e.name, e.attributes["itemtype"])
end