From 591e7ebc8b6a8c728eb6552cb7f510ef844439bf Mon Sep 17 00:00:00 2001 From: Wouter Scherphof Date: Sun, 24 Mar 2013 23:20:03 +0100 Subject: [PATCH] improved not and lost method chaining fixed :not() in that it filters after all matches, preventing later selection of elements that shouldn't have been there Also, ditched the idea of returning a container node, since it was complex and didn't add much. The functionality could be reintroduced by having Set implement the __call or maybe even __index to return the combined results of all its elements. --- ElementNode.lua | 55 ++++++++++++++++--------------------------------- test.lua | 42 ++++++++++++++++++++++--------------- 2 files changed, 43 insertions(+), 54 deletions(-) diff --git a/ElementNode.lua b/ElementNode.lua index fa08ad6..73ebb56 100644 --- a/ElementNode.lua +++ b/ElementNode.lua @@ -17,9 +17,7 @@ function ElementNode:new(nameortext, node, descend, openstart, openend) deepernodes = Set:new(), deeperelements = {}, deeperattributes = {}, deeperids = {}, deeperclasses = {} } - if nameortext == "container" then - instance.root = node - elseif not node then + if not node then instance.name = "root" instance.root = instance instance._text = nameortext @@ -90,58 +88,41 @@ function ElementNode:close(closestart, closeend) end local function select(self, s) - if not s or type(s) ~= "string" then return {} end - local subjects = Set:new({self}) - local resultset - local childrenonly + if not s or type(s) ~= "string" then return Set:new() end + local subjects, resultset, childrenonly = Set:new({self}) + local sets = { + [""] = self.deeperelements, + ["["] = self.deeperattributes, + ["#"] = self.deeperids, + ["."] = self.deeperclasses + } for part in string.gmatch(s, "%S+") do if part == ">" then childrenonly = true goto nextpart end resultset = Set:new() for subject in pairs(subjects) do - local init = subject.deepernodes - if childrenonly then init = Set:new(subject.nodes) childrenonly = false end - resultset = resultset + init + local star = subject.deepernodes + if childrenonly then star = Set:new(subject.nodes) end + childrenonly = false + resultset = resultset + star end if part == "*" then goto nextpart end - local match, filter + local excludes, filter = Set:new() for t, w in string.gmatch(part, "([:%[#.]?)([^:%(%[#.%]%)]+)%]?%)?") do - -- TODO tidy up if t == ":" then filter = w goto nextw end - if t == "" then match = self.deeperelements[w] - elseif t == "[" then match = self.deeperattributes[w] - elseif t == "#" then match = self.deeperids[w] - elseif t == "." then match = self.deeperclasses[w] - end + local match = sets[t][w] if filter == "not" then - resultset = resultset - match + excludes = excludes + match else resultset = resultset * match end filter = nil ::nextw:: end + resultset = resultset - excludes subjects = Set:new(resultset) ::nextpart:: end - -- construct a container node for the resultset, so that we can :select() on it - local ret = ElementNode:new("container", self) - for node in pairs(resultset) do - table.insert(ret.nodes, node) - ret.deepernodes = ret.deepernodes + node.deepernodes - for listname,list in pairs({ - deeperelements = node.deeperelements, - deeperattributes = node.deeperattributes, - deeperids = node.deeperids, - deeperclasses = node.deeperclasses - }) do - local target = ret[listname] - for k,set in pairs(list) do - -- Set.__add will create an empty Set if not target[k] - target[k] = target[k] + set - end - end - end - return ret + return resultset end function ElementNode:select(s) return select(self, s) end diff --git a/test.lua b/test.lua index 318b34a..5417bd1 100644 --- a/test.lua +++ b/test.lua @@ -26,11 +26,11 @@ p(root) local function select( s ) print "" print("->", s) - local tags = root:select(s) - for i,t in ipairs(tags.nodes) do - print(t.name) + local sel = root:select(s) + for element in pairs(sel) do + print(element.name) end - print(# tags.nodes) + print(sel:len()) end select("*") select("link") @@ -53,8 +53,8 @@ select("body > [class]") print("\nchapters") local sel, chapters = root("ol.chapters > li"), {} -for _,v in ipairs(sel.nodes) do - table.insert(chapters, v:getcontent()) +for e in pairs(sel) do + table.insert(chapters, e:getcontent()) end -- print for i,v in ipairs(chapters) do @@ -62,11 +62,11 @@ for i,v in ipairs(chapters) do end print("\ncontacts") -local sel, contacts = root("ul.contacts > li")("span[class]"), {} -for _,v in ipairs(sel.nodes) do - local id = v.parent.parent.id -- li > a > span +local sel, contacts = root("ul.contacts span[class]"), {} +for e in pairs(sel) do + local id = e.parent.parent.id -- li > a > span contacts[id] = contacts[id] or {} - contacts[id][v.classes[1]] = v:getcontent() + contacts[id][e.classes[1]] = e:getcontent() end -- print for k,v in pairs(contacts) do @@ -78,7 +78,7 @@ end print("\nmicrodata") local sel, scopes = root("[itemprop]"), {} -for _,prop in ipairs(sel.nodes) do +for prop in pairs(sel) do if prop.attributes["itemscope"] then goto nextprop end local descendantscopes, scope = {}, prop while true do @@ -115,12 +115,20 @@ for node,table in pairs(scopes) do printscope(node, table) end -local sel = root("[itemscope]:not([itemprop])") -for i,v in ipairs(sel.nodes) do - print(v.name) +print("\nnot firstname") +local sel = root(".contacts span:not(.firstname)") +for e in pairs(sel) do + print(e.classes[1], e:getcontent()) end -local sel = root("[href]:not(a)") -for i,v in ipairs(sel.nodes) do - print(v.name) +print("\nnot a hrefs") +local sel = root(":not(a)[href]") +for e in pairs(sel) do + print(e.name, e.attributes["href"]) +end + +print("\ntop itemscopes") +local sel = root("[itemscope]:not([itemprop])") +for e in pairs(sel) do + print(e.name, e.attributes["itemtype"]) end \ No newline at end of file