require("luarocks.loader") local Set = require("Set") local ElementNode = {} ElementNode.mt = {__index = ElementNode} function ElementNode:new(nameortext, node, descend, openstart, openend) local instance = { name = nameortext, level = 0, parent = nil, root = nil, nodes = {}, _openstart = openstart, _openend = openend, _closestart = openstart, _closeend = openend, attributes = {}, id = nil, classes = {}, deepernodes = Set:new(), deeperelements = {}, deeperattributes = {}, deeperids = {}, deeperclasses = {} } if not node then instance.name = "root" instance.root = instance instance._text = nameortext local length = string.len(nameortext) instance._openstart, instance._openend = 1, length instance._closestart, instance._closeend = 1, length elseif descend then instance.root = node.root instance.parent = node instance.level = node.level + 1 table.insert(node.nodes, instance) else instance.root = node.root instance.parent = node.parent instance.level = node.level table.insert(node.parent.nodes, instance) end return setmetatable(instance, ElementNode.mt) end function ElementNode:gettext() return string.sub(self.root._text, self._openstart, self._closeend) end function ElementNode:getcontent() return string.sub(self.root._text, self._openend + 1, self._closestart - 1) end function ElementNode:addattribute(k, v) self.attributes[k] = v if string.lower(k) == "id" then self.id = v end -- class attribute contains "space-separated tokens", each of which we'd like quick access to if string.lower(k) == "class" then for class in string.gmatch(v, "%S+") do table.insert(self.classes, class) end end end local function insert(table, name, node) table[name] = table[name] or Set:new() table[name]:add(node) end function ElementNode:close(closestart, closeend) if closestart and closeend then self._closestart, self._closeend = closestart, closeend end -- inform hihger level nodes about this element's existence in their branches local node = self while true do node = node.parent if not node then break end node.deepernodes:add(self) insert(node.deeperelements, self.name, self) for k in pairs(self.attributes) do insert(node.deeperattributes, k, self) end if self.id then insert(node.deeperids, self.id, self) end for _,v in ipairs(self.classes) do insert(node.deeperclasses, v, self) end end end local function escape(s) -- escape all ^, $, (, ), %, ., [, ], *, +, - , and ? with a % prefix return string.gsub(s, "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%" .. "%1") end local function select(self, s) if not s or type(s) ~= "string" or s == "" then return Set:new() end local sets = {[""] = self.deeperelements, ["["] = self.deeperattributes, ["#"] = self.deeperids, ["."] = self.deeperclasses} local function match(t, w) local m, v if t == "[" then w, m, v = string.match(w, "([^=|%*~%$!%^]+)" .. -- w = 1 or more characters up to a possible "=", "|", "*", "~", "$", "!", or "^" "([|%*~%$!%^]?)" .. -- m = an optional "|", "*", "~", "$", "!", or "^", preceding the optional "=" "=?" .. -- an optional uncaptured "=" "(.*)" -- v = anything following the "=", or else "" ) end local matched = Set:new(sets[t][w]) -- attribute value selectors if v and v ~= "" then v = string.sub(v, 2, #v - 1) -- strip quotes for node in pairs(matched) do local a = node.attributes[w] -- equals if m == "" and a ~= v then matched:remove(node) -- not equals elseif m == "!" and a == v then matched:remove(node) -- prefix elseif m =="|" and string.match(a, "^[^-]*") ~= v then matched:remove(node) -- contains elseif m =="*" and string.match(a, escape(v)) ~= v then matched:remove(node) -- word elseif m =="~" then matched:remove(node) for word in string.gmatch(a, "%S+") do if word == v then matched:add(node) break end end -- starts with elseif m =="^" and string.match(a, "^" .. escape(v)) ~= v then matched:remove(node) -- ends with elseif m =="$" and string.match(a, escape(v) .. "$") ~= v then matched:remove(node) end end -- for node end -- if v return matched end local subjects, resultset, childrenonly = Set:new({self}) for part in string.gmatch(s, "%S+") do if part == ">" then childrenonly = true goto nextpart end resultset = Set:new() for subject in pairs(subjects) do local star = subject.deepernodes if childrenonly then star = Set:new(subject.nodes) childrenonly = false end resultset = resultset + star end if part == "*" then goto nextpart end local excludes, filter = Set:new() for t, w in string.gmatch(part, "([:%[#.]?)" .. -- t = an optional :, [, #, or . "([^:%(%[#.%]%)]+)" .. -- w = 1 or more of anything not :, (, [, #, ., ], or ) "%]?%)?" -- followed by an uncaptured optional ] and/or ) ) do if t == ":" then filter = w goto nextw end local matched = match(t, w) if filter == "not" then excludes = excludes + matched else resultset = resultset * matched end filter = nil ::nextw:: end resultset = resultset - excludes subjects = Set:new(resultset) ::nextpart:: end return resultset end function ElementNode:select(s) return select(self, s) end ElementNode.mt.__call = select return ElementNode