First draft for a Rock setup

This commit is contained in:
Wouter Scherphof
2013-03-28 12:24:10 +01:00
parent 3af809df9a
commit 4a9fa0a790
8 changed files with 109 additions and 7 deletions

69
src/htmlparser.lua Normal file
View File

@@ -0,0 +1,69 @@
local ElementNode = require("htmlparser.ElementNode")
local voidelements = require("htmlparser.voidelements")
local HtmlParser = {}
local function parse(text)
local root = ElementNode:new(text)
local node, descend, tpos, opentags = root, true, 1, {}
while true do
local openstart, name
openstart, tpos, name = string.find(root._text,
"<" .. -- an uncaptured starting "<"
"(%w+)" .. -- name = the first word, directly following the "<"
"[^>]*>", -- include, but not capture everything up to the next ">"
tpos)
if not name then break end
local tag = ElementNode:new(name, node, descend, openstart, tpos)
node = tag
local tagst, apos = tag:gettext(), 1
while true do
local start, k, eq, quote, v
start, apos, k, eq, quote = string.find(tagst,
"%s+" .. -- some uncaptured space
"([^%s=]+)" .. -- k = an unspaced string up to an optional "="
"(=?)" .. -- eq = the optiona; "=", else ""
"(['\"]?)", -- quote = an optional "'" or '"' following the "=", or ""
apos)
if not k or k == "/>" then break end
if eq == "" then
v = ""
else
local pattern = "=([^%s>]*)"
if quote ~= '' then
pattern = quote .. "([^" .. quote .. "]*)" .. quote
end
start, apos, v = string.find(tagst, pattern, apos)
end
tag:addattribute(k, v)
end
if voidelements[string.lower(tag.name)] then
descend = false
tag:close()
else
opentags[tag.name] = opentags[tag.name] or {}
table.insert(opentags[tag.name], tag)
end
local closeend = tpos
while true do
local closestart, closing, closename
closestart, closeend, closing, closename = string.find(root._text, "[^<]*<(/?)(%w+)", closeend)
if not closing or closing == "" then break end
tag = table.remove(opentags[closename])
closestart = string.find(root._text, "<", closestart)
tag:close(closestart, closeend + 1)
node = tag.parent
descend = true
end
end
return root
end
HtmlParser.parse = parse
return HtmlParser

View File

@@ -0,0 +1,174 @@
require("luarocks.loader")
local Set = require("Set")
local ElementNode = {}
ElementNode.mt = {__index = ElementNode}
function ElementNode:new(nameortext, node, descend, openstart, openend)
local instance = {
name = nameortext,
level = 0,
parent = nil,
root = nil,
nodes = {},
_openstart = openstart, _openend = openend,
_closestart = openstart, _closeend = openend,
attributes = {},
id = nil,
classes = {},
deepernodes = Set:new(),
deeperelements = {}, deeperattributes = {}, deeperids = {}, deeperclasses = {}
}
if not node then
instance.name = "root"
instance.root = instance
instance._text = nameortext
local length = string.len(nameortext)
instance._openstart, instance._openend = 1, length
instance._closestart, instance._closeend = 1, length
elseif descend then
instance.root = node.root
instance.parent = node
instance.level = node.level + 1
table.insert(node.nodes, instance)
else
instance.root = node.root
instance.parent = node.parent
instance.level = node.level
table.insert(node.parent.nodes, instance)
end
return setmetatable(instance, ElementNode.mt)
end
function ElementNode:gettext()
return string.sub(self.root._text, self._openstart, self._closeend)
end
function ElementNode:getcontent()
return string.sub(self.root._text, self._openend + 1, self._closestart - 1)
end
function ElementNode:addattribute(k, v)
self.attributes[k] = v
if string.lower(k) == "id" then
self.id = v
end
-- class attribute contains "space-separated tokens", each of which we'd like quick access to
if string.lower(k) == "class" then
for class in string.gmatch(v, "%S+") do
table.insert(self.classes, class)
end
end
end
local function insert(table, name, node)
table[name] = table[name] or Set:new()
table[name]:add(node)
end
function ElementNode:close(closestart, closeend)
if closestart and closeend then
self._closestart, self._closeend = closestart, closeend
end
-- inform hihger level nodes about this element's existence in their branches
local node = self
while true do
node = node.parent
if not node then break end
node.deepernodes:add(self)
insert(node.deeperelements, self.name, self)
for k in pairs(self.attributes) do
insert(node.deeperattributes, k, self)
end
if self.id then
insert(node.deeperids, self.id, self)
end
for _,v in ipairs(self.classes) do
insert(node.deeperclasses, v, self)
end
end
end
local function escape(s)
-- escape all ^, $, (, ), %, ., [, ], *, +, - , and ? with a % prefix
return string.gsub(s, "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%" .. "%1")
end
local function select(self, s)
if not s or type(s) ~= "string" or s == "" then return Set:new() end
local sets = {[""] = self.deeperelements, ["["] = self.deeperattributes,
["#"] = self.deeperids, ["."] = self.deeperclasses}
local function match(t, w)
local m, v
if t == "[" then w, m, v = string.match(w,
"([^=|%*~%$!%^]+)" .. -- w = 1 or more characters up to a possible "=", "|", "*", "~", "$", "!", or "^"
"([|%*~%$!%^]?)" .. -- m = an optional "|", "*", "~", "$", "!", or "^", preceding the optional "="
"=?" .. -- an optional uncaptured "="
"(.*)" -- v = anything following the "=", or else ""
)
end
local matched = Set:new(sets[t][w])
-- attribute value selectors
if v and v ~= "" then
v = string.sub(v, 2, #v - 1) -- strip quotes
for node in pairs(matched) do
local a = node.attributes[w]
-- equals
if m == "" and a ~= v then matched:remove(node)
-- not equals
elseif m == "!" and a == v then matched:remove(node)
-- prefix
elseif m =="|" and string.match(a, "^[^-]*") ~= v then matched:remove(node)
-- contains
elseif m =="*" and string.match(a, escape(v)) ~= v then matched:remove(node)
-- word
elseif m =="~" then matched:remove(node)
for word in string.gmatch(a, "%S+") do
if word == v then matched:add(node) break end
end
-- starts with
elseif m =="^" and string.match(a, "^" .. escape(v)) ~= v then matched:remove(node)
-- ends with
elseif m =="$" and string.match(a, escape(v) .. "$") ~= v then matched:remove(node)
end
end -- for node
end -- if v
return matched
end
local subjects, resultset, childrenonly = Set:new({self})
for part in string.gmatch(s, "%S+") do
if part == ">" then childrenonly = true goto nextpart end
resultset = Set:new()
for subject in pairs(subjects) do
local star = subject.deepernodes
if childrenonly then star = Set:new(subject.nodes) childrenonly = false end
resultset = resultset + star
end
if part == "*" then goto nextpart end
local excludes, filter = Set:new()
for t, w in string.gmatch(part,
"([:%[#.]?)" .. -- t = an optional :, [, #, or .
"([^:%(%[#.%]%)]+)" .. -- w = 1 or more of anything not :, (, [, #, ., ], or )
"%]?%)?" -- followed by an uncaptured optional ] and/or )
) do
if t == ":" then filter = w goto nextw end
local matched = match(t, w)
if filter == "not" then
excludes = excludes + matched
else
resultset = resultset * matched
end
filter = nil
::nextw::
end
resultset = resultset - excludes
subjects = Set:new(resultset)
::nextpart::
end
return resultset
end
function ElementNode:select(s) return select(self, s) end
ElementNode.mt.__call = select
return ElementNode

View File

@@ -0,0 +1,18 @@
return {
area = true,
base = true,
br = true,
col = true,
command = true,
embed = true,
hr = true,
img = true,
input = true,
keygen = true,
link = true,
meta = true,
param = true,
source = true,
track = true,
wbr = true
}