mirror of
https://github.com/TangentFoxy/.lua-files.git
synced 2024-11-20 21:34:23 +00:00
added dependencies of make-epub and fixed it :D
This commit is contained in:
parent
defbee8e5c
commit
3ee0adbd48
272
htmlparser.lua
Normal file
272
htmlparser.lua
Normal file
@ -0,0 +1,272 @@
|
|||||||
|
-- vim: ft=lua ts=2 sw=2
|
||||||
|
|
||||||
|
-- Syntactic Sugar {{{
|
||||||
|
local function rine(val) -- Return (val) If it's Not Empty (non-zero-length)
|
||||||
|
return (val and #val>0) and val
|
||||||
|
end
|
||||||
|
local function rit(a) -- Return (a) If it's Table
|
||||||
|
return (type(a) == "table") and a
|
||||||
|
end
|
||||||
|
local noop = function() end
|
||||||
|
local esc = function(s) return string.gsub(s, "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%" .. "%1") end
|
||||||
|
local str = tostring
|
||||||
|
local char = string.char
|
||||||
|
local opts = rit(htmlparser_opts) or {} -- needed for silent/noerr/noout/nonl directives, also needed to be defined before `require` in such case
|
||||||
|
local prn = opts.silent and noop or function(l,f,...)
|
||||||
|
local fd = (l=="i") and "stdout" or "stderr"
|
||||||
|
local t = (" [%s] "):format(l:upper())
|
||||||
|
io[fd]
|
||||||
|
:write('[HTMLParser]'..t..f:format(...)
|
||||||
|
..(opts.nonl or "\n")
|
||||||
|
)
|
||||||
|
end
|
||||||
|
local err = opts.noerr and noop or function(f,...) prn("e",f,...) end
|
||||||
|
local out = opts.noout and noop or function(f,...) prn("i",f,...) end
|
||||||
|
local line = debug and function(lvl) return debug.getinfo(lvl or 2).currentline end or noop
|
||||||
|
local dbg = opts.debug and function(f,...) prn("d",f:gsub("#LINE#",str(line(3))),...) end or noop
|
||||||
|
-- }}}
|
||||||
|
-- Requires {{{
|
||||||
|
|
||||||
|
-- MODIFIED --
|
||||||
|
|
||||||
|
-- local ElementNode = require"htmlparser.ElementNode"
|
||||||
|
-- local voidelements = require"htmlparser.voidelements"
|
||||||
|
local success, utility = pcall(function()
|
||||||
|
return dofile(arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)") .. "utility-functions.lua")
|
||||||
|
end)
|
||||||
|
if not success then
|
||||||
|
print("\n\n" .. tostring(utility))
|
||||||
|
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
|
||||||
|
end
|
||||||
|
|
||||||
|
local ElementNode = utility.require("htmlparser/ElementNode")
|
||||||
|
local voidelements = utility.require("htmlparser/voidelements")
|
||||||
|
|
||||||
|
-- MODIFIED --
|
||||||
|
|
||||||
|
--}}}
|
||||||
|
local HtmlParser = {}
|
||||||
|
local function parse(text,limit) -- {{{
|
||||||
|
local opts = rine(opts) -- use top-level opts-table (the one, defined before requiring the module), if exists
|
||||||
|
or rit(htmlparser_opts) -- or defined after requiring (but before calling `parse`)
|
||||||
|
or {} -- fallback otherwise
|
||||||
|
opts.looplimit = opts.looplimit or htmlparser_looplimit
|
||||||
|
|
||||||
|
local text = str(text)
|
||||||
|
local limit = limit or opts.looplimit or 1000
|
||||||
|
local tpl = false
|
||||||
|
|
||||||
|
if not opts.keep_comments then -- Strip (or not) comments {{{
|
||||||
|
text = text:gsub("<!%-%-.-%-%->","") -- Many chances commented code will have syntax errors, that'll lead to parser failures
|
||||||
|
end -- }}}
|
||||||
|
|
||||||
|
local tpr={}
|
||||||
|
|
||||||
|
if not opts.keep_danger_placeholders then -- {{{ little speedup by cost of potential parsing breakages
|
||||||
|
-- search unused "invalid" bytes {{{
|
||||||
|
local busy,i={},0;
|
||||||
|
repeat -- {{{
|
||||||
|
local cc = char(i)
|
||||||
|
if not(text:match(cc)) then -- {{{
|
||||||
|
if not(tpr["<"]) or not(tpr[">"]) then -- {{{
|
||||||
|
if not(busy[i]) then -- {{{
|
||||||
|
if not(tpr["<"]) then -- {{{
|
||||||
|
tpr["<"] = cc;
|
||||||
|
elseif not(tpr[">"]) then
|
||||||
|
tpr[">"] = cc;
|
||||||
|
end -- }}}
|
||||||
|
busy[i] = true
|
||||||
|
dbg("c:{%s}||cc:{%d}||tpr[c]:{%s}",str(c),cc:byte(),str(tpr[c]))
|
||||||
|
dbg("busy[i]:{%s},i:{%d}",str(busy[i]),i)
|
||||||
|
dbg("[FindPH]:#LINE# Success! || i=%d",i)
|
||||||
|
else -- if !busy
|
||||||
|
dbg("[FindPH]:#LINE# Busy! || i=%d",i)
|
||||||
|
end -- if !busy -- }}}
|
||||||
|
dbg("c:{%s}||cc:{%d}||tpr[c]:{%s}",c,cc:byte(),str(tpr[c]))
|
||||||
|
dbg("%s",str(busy[i]))
|
||||||
|
else -- if < or >
|
||||||
|
dbg("[FindPH]:#LINE# Done!",i)
|
||||||
|
break
|
||||||
|
end -- if < or > -- }}}
|
||||||
|
else -- text!match(cc)
|
||||||
|
dbg("[FindPH]:#LINE# Text contains this byte! || i=%d",i)
|
||||||
|
end -- text!match(cc) -- }}}
|
||||||
|
local skip=1
|
||||||
|
if i==31 then
|
||||||
|
skip=96 -- ASCII
|
||||||
|
end
|
||||||
|
i=i+skip
|
||||||
|
until (i==255) -- }}}
|
||||||
|
i=nil
|
||||||
|
--- }}}
|
||||||
|
|
||||||
|
if not(tpr["<"]) or not(tpr[">"]) then
|
||||||
|
err("Impossible to find at least two unused byte codes in this HTML-code. We need it to escape bracket-contained placeholders inside tags.")
|
||||||
|
err("Consider enabling 'keep_danger_placeholders' option (to silence this error, if parser wasn't failed with current HTML-code) or manually replace few random bytes, to free up the codes.")
|
||||||
|
else
|
||||||
|
dbg("[FindPH]:#LINE# Found! || '<'=%d, '>'=%d",tpr["<"]:byte(),tpr[">"]:byte())
|
||||||
|
end
|
||||||
|
|
||||||
|
-- dbg("tpr[>] || tpr[] || #busy%d")
|
||||||
|
|
||||||
|
-- g {{{
|
||||||
|
local function g(id,...)
|
||||||
|
local arg={...}
|
||||||
|
local orig=arg[id]
|
||||||
|
arg[id]=arg[id]:gsub("(.)",tpr)
|
||||||
|
if arg[id] ~= orig then
|
||||||
|
tpl=true
|
||||||
|
dbg("[g]:#LINE# orig: %s", str(orig))
|
||||||
|
dbg("[g]:#LINE# replaced: %s",str(arg[id]))
|
||||||
|
end
|
||||||
|
dbg("[g]:#LINE# called, id: %s, arg[id]: %s, args { "..(("{%s}, "):rep(#arg):gsub(", $","")).." }",id,arg[id],...)
|
||||||
|
dbg("[g]:#LINE# concat(arg): %s",table.concat(arg))
|
||||||
|
return table.concat(arg)
|
||||||
|
end
|
||||||
|
-- g }}}
|
||||||
|
|
||||||
|
-- tpl-placeholders and attributes {{{
|
||||||
|
text=text
|
||||||
|
:gsub(
|
||||||
|
"(=[%s]-)".. -- only match attr.values, and not random strings between two random apostrophs
|
||||||
|
"(%b'')",
|
||||||
|
function(...)return g(2,...)end
|
||||||
|
)
|
||||||
|
:gsub(
|
||||||
|
"(=[%s]-)".. -- same for "
|
||||||
|
'(%b"")',
|
||||||
|
function(...)return g(2,...)end
|
||||||
|
) -- Escape "<"/">" inside attr.values (see issue #50)
|
||||||
|
:gsub(
|
||||||
|
"(<".. -- Match "<",
|
||||||
|
(opts.tpl_skip_pattern or "[^!]").. -- with exclusion pattern (for example, to ignore comments, which aren't template placeholders, but can legally contain "<"/">" inside.
|
||||||
|
")([^>]+)".. -- If matched, we want to escape '<'s if we meet them inside tag
|
||||||
|
"(>)",
|
||||||
|
function(...)return g(2,...)end
|
||||||
|
)
|
||||||
|
:gsub(
|
||||||
|
"("..
|
||||||
|
(tpr["<"] or "__FAILED__").. -- Here we search for "<", we escaped in previous gsub (and don't break things if we have no escaping replacement)
|
||||||
|
")("..
|
||||||
|
(opts.tpl_marker_pattern or "[^%w%s]").. -- Capture templating symbol
|
||||||
|
")([%g%s]-)".. -- match placeholder's content
|
||||||
|
"(%2)(>)".. -- placeholder's tail
|
||||||
|
"([^>]*>)", -- remainings
|
||||||
|
function(...)return g(5,...)end
|
||||||
|
)
|
||||||
|
-- }}}
|
||||||
|
end -- }}}
|
||||||
|
|
||||||
|
local index = 0
|
||||||
|
local root = ElementNode:new(index, str(text))
|
||||||
|
local node, descend, tpos, opentags = root, true, 1, {}
|
||||||
|
|
||||||
|
while true do -- MainLoop {{{
|
||||||
|
if index == limit then -- {{{
|
||||||
|
err("Main loop reached loop limit (%d). Consider either increasing it or checking HTML-code for syntax errors", limit)
|
||||||
|
break
|
||||||
|
end -- }}}
|
||||||
|
-- openstart/tpos Definitions {{{
|
||||||
|
local openstart, name
|
||||||
|
openstart, tpos, name = root._text:find(
|
||||||
|
"<" .. -- an uncaptured starting "<"
|
||||||
|
"([%w-]+)" .. -- name = the first word, directly following the "<"
|
||||||
|
"[^>]*>", -- include, but not capture everything up to the next ">"
|
||||||
|
tpos)
|
||||||
|
dbg("[MainLoop]:#LINE# openstart=%s || tpos=%s || name=%s",str(openstart),str(tpos),str(name))
|
||||||
|
-- }}}
|
||||||
|
if not name then break end
|
||||||
|
-- Some more vars {{{
|
||||||
|
index = index + 1
|
||||||
|
local tag = ElementNode:new(index, str(name), (node or {}), descend, openstart, tpos)
|
||||||
|
node = tag
|
||||||
|
local tagloop
|
||||||
|
local tagst, apos = tag:gettext(), 1
|
||||||
|
-- }}}
|
||||||
|
while true do -- TagLoop {{{
|
||||||
|
dbg("[TagLoop]:#LINE# tag.name=%s, tagloop=%s",str(tag.name),str(tagloop))
|
||||||
|
if tagloop == limit then -- {{{
|
||||||
|
err("Tag parsing loop reached loop limit (%d). Consider either increasing it or checking HTML-code for syntax errors", limit)
|
||||||
|
break
|
||||||
|
end -- }}}
|
||||||
|
-- Attrs {{{
|
||||||
|
local start, k, eq, quote, v, zsp
|
||||||
|
start, apos, k, zsp, eq, zsp, quote = tagst:find(
|
||||||
|
"%s+" .. -- some uncaptured space
|
||||||
|
"([^%s=/>]+)" .. -- k = an unspaced string up to an optional "=" or the "/" or ">"
|
||||||
|
"([%s]-)".. -- zero or more spaces
|
||||||
|
"(=?)" .. -- eq = the optional; "=", else ""
|
||||||
|
"([%s]-)".. -- zero or more spaces
|
||||||
|
[=[(['"]?)]=], -- quote = an optional "'" or '"' following the "=", or ""
|
||||||
|
apos)
|
||||||
|
dbg("[TagLoop]:#LINE# start=%s || apos=%s || k=%s || zsp='%s' || eq='%s', quote=[%s]",str(start),str(apos),str(k),str(zsp),str(eq),str(quote))
|
||||||
|
-- }}}
|
||||||
|
if not k or k == "/>" or k == ">" then break end
|
||||||
|
-- Pattern {{{
|
||||||
|
if eq == "=" then
|
||||||
|
local pattern = "=([^%s>]*)"
|
||||||
|
if quote ~= "" then
|
||||||
|
pattern = quote .. "([^" .. quote .. "]*)" .. quote
|
||||||
|
end
|
||||||
|
start, apos, v = tagst:find(pattern, apos)
|
||||||
|
dbg("[TagLoop]:#LINE# start=%s || apos=%s || v=%s || pattern=%s",str(start),str(apos),str(v),str(pattern))
|
||||||
|
end
|
||||||
|
-- }}}
|
||||||
|
v=v or ""
|
||||||
|
if tpl then -- {{{
|
||||||
|
for rk,rv in pairs(tpr) do
|
||||||
|
v = v:gsub(rv,rk)
|
||||||
|
dbg("[TagLoop]:#LINE# rv=%s || rk=%s",str(rv),str(rk))
|
||||||
|
end
|
||||||
|
end -- }}}
|
||||||
|
|
||||||
|
dbg("[TagLoop]:#LINE# k=%s || v=%s",str(k),str(v))
|
||||||
|
tag:addattribute(k, v)
|
||||||
|
tagloop = (tagloop or 0) + 1
|
||||||
|
end
|
||||||
|
-- }}}
|
||||||
|
if voidelements[tag.name:lower()] then -- {{{
|
||||||
|
descend = false
|
||||||
|
tag:close()
|
||||||
|
else
|
||||||
|
descend = true
|
||||||
|
opentags[tag.name] = opentags[tag.name] or {}
|
||||||
|
table.insert(opentags[tag.name], tag)
|
||||||
|
end
|
||||||
|
-- }}}
|
||||||
|
local closeend = tpos
|
||||||
|
local closingloop
|
||||||
|
while true do -- TagCloseLoop {{{
|
||||||
|
-- Can't remember why did I add that, so comment it for now (and not remove), in case it will be needed again
|
||||||
|
-- (although, it causes #59 and #60, so it will anyway be needed to rework)
|
||||||
|
-- if voidelements[tag.name:lower()] then break end -- already closed
|
||||||
|
if closingloop == limit then
|
||||||
|
err("Tag closing loop reached loop limit (%d). Consider either increasing it or checking HTML-code for syntax errors", limit)
|
||||||
|
break
|
||||||
|
end
|
||||||
|
|
||||||
|
local closestart, closing, closename
|
||||||
|
closestart, closeend, closing, closename = root._text:find("[^<]*<(/?)([%w-]+)", closeend)
|
||||||
|
dbg("[TagCloseLoop]:#LINE# closestart=%s || closeend=%s || closing=%s || closename=%s",str(closestart),str(closeend),str(closing),str(closename))
|
||||||
|
|
||||||
|
if not closing or closing == "" then break end
|
||||||
|
|
||||||
|
tag = table.remove(opentags[closename] or {}) or tag -- kludges for the cases of closing void or non-opened tags
|
||||||
|
closestart = root._text:find("<", closestart)
|
||||||
|
dbg("[TagCloseLoop]:#LINE# closestart=%s",str(closestart))
|
||||||
|
tag:close(closestart, closeend + 1)
|
||||||
|
node = tag.parent
|
||||||
|
descend = true
|
||||||
|
closingloop = (closingloop or 0) + 1
|
||||||
|
end -- }}}
|
||||||
|
end -- }}}
|
||||||
|
if tpl then -- {{{
|
||||||
|
dbg("tpl")
|
||||||
|
for k,v in pairs(tpr) do
|
||||||
|
root._text = root._text:gsub(v,k)
|
||||||
|
end
|
||||||
|
end -- }}}
|
||||||
|
return root
|
||||||
|
end -- }}}
|
||||||
|
HtmlParser.parse = parse
|
||||||
|
return HtmlParser
|
283
htmlparser/ElementNode.lua
Normal file
283
htmlparser/ElementNode.lua
Normal file
@ -0,0 +1,283 @@
|
|||||||
|
-- vim: ft=lua ts=2
|
||||||
|
local Set = {}
|
||||||
|
Set.mt = {__index = Set}
|
||||||
|
function Set:new(values)
|
||||||
|
local instance = {}
|
||||||
|
local isSet if getmetatable(values) == Set.mt then isSet = true end
|
||||||
|
if type(values) == "table" then
|
||||||
|
if not isSet and #values > 0 then
|
||||||
|
for _,v in ipairs(values) do
|
||||||
|
instance[v] = true
|
||||||
|
end
|
||||||
|
else
|
||||||
|
for k in pairs(values) do
|
||||||
|
instance[k] = true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
elseif values ~= nil then
|
||||||
|
instance = {[values] = true}
|
||||||
|
end
|
||||||
|
return setmetatable(instance, Set.mt)
|
||||||
|
end
|
||||||
|
|
||||||
|
function Set:add(e)
|
||||||
|
if e ~= nil then self[e] = true end
|
||||||
|
return self
|
||||||
|
end
|
||||||
|
|
||||||
|
function Set:remove(e)
|
||||||
|
if e ~= nil then self[e] = nil end
|
||||||
|
return self
|
||||||
|
end
|
||||||
|
|
||||||
|
function Set:tolist()
|
||||||
|
local res = {}
|
||||||
|
for k in pairs(self) do
|
||||||
|
table.insert(res, k)
|
||||||
|
end
|
||||||
|
return res
|
||||||
|
end
|
||||||
|
|
||||||
|
Set.mt.__add = function (a, b)
|
||||||
|
local res, a, b = Set:new(), Set:new(a), Set:new(b)
|
||||||
|
for k in pairs(a) do res[k] = true end
|
||||||
|
for k in pairs(b) do res[k] = true end
|
||||||
|
return res
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Subtraction
|
||||||
|
Set.mt.__sub = function (a, b)
|
||||||
|
local res, a, b = Set:new(), Set:new(a), Set:new(b)
|
||||||
|
for k in pairs(a) do res[k] = true end
|
||||||
|
for k in pairs(b) do res[k] = nil end
|
||||||
|
return res
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Intersection
|
||||||
|
Set.mt.__mul = function (a, b)
|
||||||
|
local res, a, b = Set:new(), Set:new(a), Set:new(b)
|
||||||
|
for k in pairs(a) do
|
||||||
|
res[k] = b[k]
|
||||||
|
end
|
||||||
|
return res
|
||||||
|
end
|
||||||
|
|
||||||
|
-- String representation
|
||||||
|
Set.mt.__tostring = function (set)
|
||||||
|
local s = "{"
|
||||||
|
local sep = ""
|
||||||
|
for k in pairs(set) do
|
||||||
|
s = s .. sep .. tostring(k)
|
||||||
|
sep = ", "
|
||||||
|
end
|
||||||
|
return s .. "}"
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local ElementNode = {}
|
||||||
|
ElementNode.mt = {__index = ElementNode}
|
||||||
|
function ElementNode:new(index, nameortext, node, descend, openstart, openend)
|
||||||
|
local instance = {
|
||||||
|
index = index,
|
||||||
|
name = nameortext,
|
||||||
|
level = 0,
|
||||||
|
parent = nil,
|
||||||
|
root = nil,
|
||||||
|
nodes = {},
|
||||||
|
_openstart = openstart, _openend = openend,
|
||||||
|
_closestart = openstart, _closeend = openend,
|
||||||
|
attributes = {},
|
||||||
|
id = nil,
|
||||||
|
classes = {},
|
||||||
|
deepernodes = Set:new(),
|
||||||
|
deeperelements = {}, deeperattributes = {}, deeperids = {}, deeperclasses = {}
|
||||||
|
}
|
||||||
|
if not node then
|
||||||
|
instance.name = "root"
|
||||||
|
instance.root = instance
|
||||||
|
instance._text = nameortext
|
||||||
|
local length = string.len(nameortext)
|
||||||
|
instance._openstart, instance._openend = 1, length
|
||||||
|
instance._closestart, instance._closeend = 1, length
|
||||||
|
elseif descend then
|
||||||
|
instance.root = node.root
|
||||||
|
instance.parent = node
|
||||||
|
instance.level = node.level + 1
|
||||||
|
table.insert(node.nodes, instance)
|
||||||
|
else
|
||||||
|
instance.root = node.root
|
||||||
|
instance.parent = node.parent or node --XXX: adds some safety but needs more testing for heisenbugs in corner cases
|
||||||
|
instance.level = node.level
|
||||||
|
table.insert((node.parent and node.parent.nodes or node.nodes), instance) --XXX: see above about heisenbugs
|
||||||
|
end
|
||||||
|
return setmetatable(instance, ElementNode.mt)
|
||||||
|
end
|
||||||
|
|
||||||
|
function ElementNode:gettext()
|
||||||
|
return string.sub(self.root._text, self._openstart, self._closeend)
|
||||||
|
end
|
||||||
|
|
||||||
|
function ElementNode:settext(c)
|
||||||
|
self.root._text=c
|
||||||
|
end
|
||||||
|
|
||||||
|
function ElementNode:textonly()
|
||||||
|
return (self:gettext():gsub("<[^>]*>",""))
|
||||||
|
end
|
||||||
|
|
||||||
|
function ElementNode:getcontent()
|
||||||
|
return string.sub(self.root._text, self._openend + 1, self._closestart - 1)
|
||||||
|
end
|
||||||
|
|
||||||
|
function ElementNode:addattribute(k, v)
|
||||||
|
self.attributes[k] = v
|
||||||
|
if string.lower(k) == "id" then
|
||||||
|
self.id = v
|
||||||
|
-- class attribute contains "space-separated tokens", each of which we'd like quick access to
|
||||||
|
elseif string.lower(k) == "class" then
|
||||||
|
for class in string.gmatch(v, "%S+") do
|
||||||
|
table.insert(self.classes, class)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
local function insert(table, name, node)
|
||||||
|
table[name] = table[name] or Set:new()
|
||||||
|
table[name]:add(node)
|
||||||
|
end
|
||||||
|
|
||||||
|
function ElementNode:close(closestart, closeend)
|
||||||
|
if closestart and closeend then
|
||||||
|
self._closestart, self._closeend = closestart, closeend
|
||||||
|
end
|
||||||
|
-- inform hihger level nodes about this element's existence in their branches
|
||||||
|
local node = self
|
||||||
|
while true do
|
||||||
|
node = node.parent
|
||||||
|
if not node then break end
|
||||||
|
node.deepernodes:add(self)
|
||||||
|
insert(node.deeperelements, self.name, self)
|
||||||
|
for k in pairs(self.attributes) do
|
||||||
|
insert(node.deeperattributes, k, self)
|
||||||
|
end
|
||||||
|
if self.id then
|
||||||
|
insert(node.deeperids, self.id, self)
|
||||||
|
end
|
||||||
|
for _,v in ipairs(self.classes) do
|
||||||
|
insert(node.deeperclasses, v, self)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
local function escape(s)
|
||||||
|
-- escape all ^, $, (, ), %, ., [, ], *, +, - , and ? with a % prefix
|
||||||
|
return string.gsub(s, "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%" .. "%1")
|
||||||
|
end
|
||||||
|
|
||||||
|
local function select(self, s)
|
||||||
|
if not s or type(s) ~= "string" or s == "" then return Set:new() end
|
||||||
|
local sets = {[""] = self.deeperelements, ["["] = self.deeperattributes,
|
||||||
|
["#"] = self.deeperids, ["."] = self.deeperclasses}
|
||||||
|
local function match(t, w)
|
||||||
|
local m, e, v
|
||||||
|
if t == "[" then w, m, e, v = string.match(w,
|
||||||
|
"([^=|%*~%$!%^]+)" .. -- w = 1 or more characters up to a possible "=", "|", "*", "~", "$", "!", or "^"
|
||||||
|
"([|%*~%$!%^]?)" .. -- m = an optional "|", "*", "~", "$", "!", or "^", preceding the optional "="
|
||||||
|
"(=?)" .. -- e = the optional "="
|
||||||
|
"(.*)" -- v = anything following the "=", or else ""
|
||||||
|
)
|
||||||
|
end
|
||||||
|
local matched = Set:new(sets[t][w])
|
||||||
|
-- attribute value selectors
|
||||||
|
if e == "=" then
|
||||||
|
if #v < 2 then v = "'" .. v .. "'" end -- values should be quoted
|
||||||
|
v = string.sub(v, 2, #v - 1) -- strip quotes
|
||||||
|
if m == "!" then matched = Set:new(self.deepernodes) end -- include those without that attribute
|
||||||
|
for node in pairs(matched) do
|
||||||
|
local a = node.attributes[w]
|
||||||
|
-- equals
|
||||||
|
if m == "" and a ~= v then matched:remove(node)
|
||||||
|
-- not equals
|
||||||
|
elseif m == "!" and a == v then matched:remove(node)
|
||||||
|
-- prefix
|
||||||
|
elseif m =="|" and string.match(a, "^[^-]*") ~= v then matched:remove(node)
|
||||||
|
-- contains
|
||||||
|
elseif m =="*" and string.match(a, escape(v)) ~= v then matched:remove(node)
|
||||||
|
-- word
|
||||||
|
elseif m =="~" then matched:remove(node)
|
||||||
|
for word in string.gmatch(a, "%S+") do
|
||||||
|
if word == v then matched:add(node) break end
|
||||||
|
end
|
||||||
|
-- starts with
|
||||||
|
elseif m =="^" and string.match(a, "^" .. escape(v)) ~= v then matched:remove(node)
|
||||||
|
-- ends with
|
||||||
|
elseif m =="$" and string.match(a, escape(v) .. "$") ~= v then matched:remove(node)
|
||||||
|
end
|
||||||
|
end -- for node
|
||||||
|
end -- if v
|
||||||
|
return matched
|
||||||
|
end
|
||||||
|
|
||||||
|
local subjects, resultset, childrenonly = Set:new({self})
|
||||||
|
for part in string.gmatch(s, "%S+") do
|
||||||
|
repeat
|
||||||
|
if part == ">" then childrenonly = true --[[goto nextpart]] break end
|
||||||
|
resultset = Set:new()
|
||||||
|
for subject in pairs(subjects) do
|
||||||
|
local star = subject.deepernodes
|
||||||
|
if childrenonly then star = Set:new(subject.nodes) end
|
||||||
|
resultset = resultset + star
|
||||||
|
end
|
||||||
|
childrenonly = false
|
||||||
|
if part == "*" then --[[goto nextpart]] break end
|
||||||
|
local excludes, filter = Set:new()
|
||||||
|
local start, pos = 0, 0
|
||||||
|
while true do
|
||||||
|
local switch, stype, name, eq, quote
|
||||||
|
start, pos, switch, stype, name, eq, quote = string.find(part,
|
||||||
|
"(%(?%)?)" .. -- switch = a possible ( or ) switching the filter on or off
|
||||||
|
"([:%[#.]?)" .. -- stype = a possible :, [, #, or .
|
||||||
|
"([%w-_\\]+)" .. -- name = 1 or more alfanumeric chars (+ hyphen, reverse slash and uderscore)
|
||||||
|
"([|%*~%$!%^]?=?)" .. -- eq = a possible |=, *=, ~=, $=, !=, ^=, or =
|
||||||
|
"(['\"]?)", -- quote = a ' or " delimiting a possible attribute value
|
||||||
|
pos + 1
|
||||||
|
)
|
||||||
|
if not name then break end
|
||||||
|
repeat
|
||||||
|
if ":" == stype then
|
||||||
|
filter = name
|
||||||
|
--[[goto nextname]] break
|
||||||
|
end
|
||||||
|
if ")" == switch then
|
||||||
|
filter = nil
|
||||||
|
end
|
||||||
|
if "[" == stype and "" ~= quote then
|
||||||
|
local value
|
||||||
|
start, pos, value = string.find(part, "(%b" .. quote .. quote .. ")]", pos)
|
||||||
|
name = name .. eq .. value
|
||||||
|
end
|
||||||
|
local matched = match(stype, name)
|
||||||
|
if filter == "not" then
|
||||||
|
excludes = excludes + matched
|
||||||
|
else
|
||||||
|
resultset = resultset * matched
|
||||||
|
end
|
||||||
|
--::nextname::
|
||||||
|
break
|
||||||
|
until true
|
||||||
|
end
|
||||||
|
resultset = resultset - excludes
|
||||||
|
subjects = Set:new(resultset)
|
||||||
|
--::nextpart::
|
||||||
|
break
|
||||||
|
until true
|
||||||
|
end
|
||||||
|
resultset = resultset:tolist()
|
||||||
|
table.sort(resultset, function (a, b) return a.index < b.index end)
|
||||||
|
return resultset
|
||||||
|
end
|
||||||
|
|
||||||
|
function ElementNode:select(s) return select(self, s) end
|
||||||
|
ElementNode.mt.__call = select
|
||||||
|
|
||||||
|
return ElementNode
|
19
htmlparser/voidelements.lua
Normal file
19
htmlparser/voidelements.lua
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
-- vim: ft=lua ts=2
|
||||||
|
return {
|
||||||
|
area = true,
|
||||||
|
base = true,
|
||||||
|
br = true,
|
||||||
|
col = true,
|
||||||
|
command = true,
|
||||||
|
embed = true,
|
||||||
|
hr = true,
|
||||||
|
img = true,
|
||||||
|
input = true,
|
||||||
|
keygen = true,
|
||||||
|
link = true,
|
||||||
|
meta = true,
|
||||||
|
param = true,
|
||||||
|
source = true,
|
||||||
|
track = true,
|
||||||
|
wbr = true
|
||||||
|
}
|
388
json.lua
Normal file
388
json.lua
Normal file
@ -0,0 +1,388 @@
|
|||||||
|
--
|
||||||
|
-- json.lua
|
||||||
|
--
|
||||||
|
-- Copyright (c) 2020 rxi
|
||||||
|
--
|
||||||
|
-- Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
|
-- this software and associated documentation files (the "Software"), to deal in
|
||||||
|
-- the Software without restriction, including without limitation the rights to
|
||||||
|
-- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||||
|
-- of the Software, and to permit persons to whom the Software is furnished to do
|
||||||
|
-- so, subject to the following conditions:
|
||||||
|
--
|
||||||
|
-- The above copyright notice and this permission notice shall be included in all
|
||||||
|
-- copies or substantial portions of the Software.
|
||||||
|
--
|
||||||
|
-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
-- SOFTWARE.
|
||||||
|
--
|
||||||
|
|
||||||
|
local json = { _version = "0.1.2" }
|
||||||
|
|
||||||
|
-------------------------------------------------------------------------------
|
||||||
|
-- Encode
|
||||||
|
-------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
local encode
|
||||||
|
|
||||||
|
local escape_char_map = {
|
||||||
|
[ "\\" ] = "\\",
|
||||||
|
[ "\"" ] = "\"",
|
||||||
|
[ "\b" ] = "b",
|
||||||
|
[ "\f" ] = "f",
|
||||||
|
[ "\n" ] = "n",
|
||||||
|
[ "\r" ] = "r",
|
||||||
|
[ "\t" ] = "t",
|
||||||
|
}
|
||||||
|
|
||||||
|
local escape_char_map_inv = { [ "/" ] = "/" }
|
||||||
|
for k, v in pairs(escape_char_map) do
|
||||||
|
escape_char_map_inv[v] = k
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function escape_char(c)
|
||||||
|
return "\\" .. (escape_char_map[c] or string.format("u%04x", c:byte()))
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function encode_nil(val)
|
||||||
|
return "null"
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function encode_table(val, stack)
|
||||||
|
local res = {}
|
||||||
|
stack = stack or {}
|
||||||
|
|
||||||
|
-- Circular reference?
|
||||||
|
if stack[val] then error("circular reference") end
|
||||||
|
|
||||||
|
stack[val] = true
|
||||||
|
|
||||||
|
if rawget(val, 1) ~= nil or next(val) == nil then
|
||||||
|
-- Treat as array -- check keys are valid and it is not sparse
|
||||||
|
local n = 0
|
||||||
|
for k in pairs(val) do
|
||||||
|
if type(k) ~= "number" then
|
||||||
|
error("invalid table: mixed or invalid key types")
|
||||||
|
end
|
||||||
|
n = n + 1
|
||||||
|
end
|
||||||
|
if n ~= #val then
|
||||||
|
error("invalid table: sparse array")
|
||||||
|
end
|
||||||
|
-- Encode
|
||||||
|
for i, v in ipairs(val) do
|
||||||
|
table.insert(res, encode(v, stack))
|
||||||
|
end
|
||||||
|
stack[val] = nil
|
||||||
|
return "[" .. table.concat(res, ",") .. "]"
|
||||||
|
|
||||||
|
else
|
||||||
|
-- Treat as an object
|
||||||
|
for k, v in pairs(val) do
|
||||||
|
if type(k) ~= "string" then
|
||||||
|
error("invalid table: mixed or invalid key types")
|
||||||
|
end
|
||||||
|
table.insert(res, encode(k, stack) .. ":" .. encode(v, stack))
|
||||||
|
end
|
||||||
|
stack[val] = nil
|
||||||
|
return "{" .. table.concat(res, ",") .. "}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function encode_string(val)
|
||||||
|
return '"' .. val:gsub('[%z\1-\31\\"]', escape_char) .. '"'
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function encode_number(val)
|
||||||
|
-- Check for NaN, -inf and inf
|
||||||
|
if val ~= val or val <= -math.huge or val >= math.huge then
|
||||||
|
error("unexpected number value '" .. tostring(val) .. "'")
|
||||||
|
end
|
||||||
|
return string.format("%.14g", val)
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local type_func_map = {
|
||||||
|
[ "nil" ] = encode_nil,
|
||||||
|
[ "table" ] = encode_table,
|
||||||
|
[ "string" ] = encode_string,
|
||||||
|
[ "number" ] = encode_number,
|
||||||
|
[ "boolean" ] = tostring,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
encode = function(val, stack)
|
||||||
|
local t = type(val)
|
||||||
|
local f = type_func_map[t]
|
||||||
|
if f then
|
||||||
|
return f(val, stack)
|
||||||
|
end
|
||||||
|
error("unexpected type '" .. t .. "'")
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
function json.encode(val)
|
||||||
|
return ( encode(val) )
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
-------------------------------------------------------------------------------
|
||||||
|
-- Decode
|
||||||
|
-------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
local parse
|
||||||
|
|
||||||
|
local function create_set(...)
|
||||||
|
local res = {}
|
||||||
|
for i = 1, select("#", ...) do
|
||||||
|
res[ select(i, ...) ] = true
|
||||||
|
end
|
||||||
|
return res
|
||||||
|
end
|
||||||
|
|
||||||
|
local space_chars = create_set(" ", "\t", "\r", "\n")
|
||||||
|
local delim_chars = create_set(" ", "\t", "\r", "\n", "]", "}", ",")
|
||||||
|
local escape_chars = create_set("\\", "/", '"', "b", "f", "n", "r", "t", "u")
|
||||||
|
local literals = create_set("true", "false", "null")
|
||||||
|
|
||||||
|
local literal_map = {
|
||||||
|
[ "true" ] = true,
|
||||||
|
[ "false" ] = false,
|
||||||
|
[ "null" ] = nil,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
local function next_char(str, idx, set, negate)
|
||||||
|
for i = idx, #str do
|
||||||
|
if set[str:sub(i, i)] ~= negate then
|
||||||
|
return i
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return #str + 1
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function decode_error(str, idx, msg)
|
||||||
|
local line_count = 1
|
||||||
|
local col_count = 1
|
||||||
|
for i = 1, idx - 1 do
|
||||||
|
col_count = col_count + 1
|
||||||
|
if str:sub(i, i) == "\n" then
|
||||||
|
line_count = line_count + 1
|
||||||
|
col_count = 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
error( string.format("%s at line %d col %d", msg, line_count, col_count) )
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function codepoint_to_utf8(n)
|
||||||
|
-- http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&id=iws-appendixa
|
||||||
|
local f = math.floor
|
||||||
|
if n <= 0x7f then
|
||||||
|
return string.char(n)
|
||||||
|
elseif n <= 0x7ff then
|
||||||
|
return string.char(f(n / 64) + 192, n % 64 + 128)
|
||||||
|
elseif n <= 0xffff then
|
||||||
|
return string.char(f(n / 4096) + 224, f(n % 4096 / 64) + 128, n % 64 + 128)
|
||||||
|
elseif n <= 0x10ffff then
|
||||||
|
return string.char(f(n / 262144) + 240, f(n % 262144 / 4096) + 128,
|
||||||
|
f(n % 4096 / 64) + 128, n % 64 + 128)
|
||||||
|
end
|
||||||
|
error( string.format("invalid unicode codepoint '%x'", n) )
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function parse_unicode_escape(s)
|
||||||
|
local n1 = tonumber( s:sub(1, 4), 16 )
|
||||||
|
local n2 = tonumber( s:sub(7, 10), 16 )
|
||||||
|
-- Surrogate pair?
|
||||||
|
if n2 then
|
||||||
|
return codepoint_to_utf8((n1 - 0xd800) * 0x400 + (n2 - 0xdc00) + 0x10000)
|
||||||
|
else
|
||||||
|
return codepoint_to_utf8(n1)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function parse_string(str, i)
|
||||||
|
local res = ""
|
||||||
|
local j = i + 1
|
||||||
|
local k = j
|
||||||
|
|
||||||
|
while j <= #str do
|
||||||
|
local x = str:byte(j)
|
||||||
|
|
||||||
|
if x < 32 then
|
||||||
|
decode_error(str, j, "control character in string")
|
||||||
|
|
||||||
|
elseif x == 92 then -- `\`: Escape
|
||||||
|
res = res .. str:sub(k, j - 1)
|
||||||
|
j = j + 1
|
||||||
|
local c = str:sub(j, j)
|
||||||
|
if c == "u" then
|
||||||
|
local hex = str:match("^[dD][89aAbB]%x%x\\u%x%x%x%x", j + 1)
|
||||||
|
or str:match("^%x%x%x%x", j + 1)
|
||||||
|
or decode_error(str, j - 1, "invalid unicode escape in string")
|
||||||
|
res = res .. parse_unicode_escape(hex)
|
||||||
|
j = j + #hex
|
||||||
|
else
|
||||||
|
if not escape_chars[c] then
|
||||||
|
decode_error(str, j - 1, "invalid escape char '" .. c .. "' in string")
|
||||||
|
end
|
||||||
|
res = res .. escape_char_map_inv[c]
|
||||||
|
end
|
||||||
|
k = j + 1
|
||||||
|
|
||||||
|
elseif x == 34 then -- `"`: End of string
|
||||||
|
res = res .. str:sub(k, j - 1)
|
||||||
|
return res, j + 1
|
||||||
|
end
|
||||||
|
|
||||||
|
j = j + 1
|
||||||
|
end
|
||||||
|
|
||||||
|
decode_error(str, i, "expected closing quote for string")
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function parse_number(str, i)
|
||||||
|
local x = next_char(str, i, delim_chars)
|
||||||
|
local s = str:sub(i, x - 1)
|
||||||
|
local n = tonumber(s)
|
||||||
|
if not n then
|
||||||
|
decode_error(str, i, "invalid number '" .. s .. "'")
|
||||||
|
end
|
||||||
|
return n, x
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function parse_literal(str, i)
|
||||||
|
local x = next_char(str, i, delim_chars)
|
||||||
|
local word = str:sub(i, x - 1)
|
||||||
|
if not literals[word] then
|
||||||
|
decode_error(str, i, "invalid literal '" .. word .. "'")
|
||||||
|
end
|
||||||
|
return literal_map[word], x
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function parse_array(str, i)
|
||||||
|
local res = {}
|
||||||
|
local n = 1
|
||||||
|
i = i + 1
|
||||||
|
while 1 do
|
||||||
|
local x
|
||||||
|
i = next_char(str, i, space_chars, true)
|
||||||
|
-- Empty / end of array?
|
||||||
|
if str:sub(i, i) == "]" then
|
||||||
|
i = i + 1
|
||||||
|
break
|
||||||
|
end
|
||||||
|
-- Read token
|
||||||
|
x, i = parse(str, i)
|
||||||
|
res[n] = x
|
||||||
|
n = n + 1
|
||||||
|
-- Next token
|
||||||
|
i = next_char(str, i, space_chars, true)
|
||||||
|
local chr = str:sub(i, i)
|
||||||
|
i = i + 1
|
||||||
|
if chr == "]" then break end
|
||||||
|
if chr ~= "," then decode_error(str, i, "expected ']' or ','") end
|
||||||
|
end
|
||||||
|
return res, i
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function parse_object(str, i)
|
||||||
|
local res = {}
|
||||||
|
i = i + 1
|
||||||
|
while 1 do
|
||||||
|
local key, val
|
||||||
|
i = next_char(str, i, space_chars, true)
|
||||||
|
-- Empty / end of object?
|
||||||
|
if str:sub(i, i) == "}" then
|
||||||
|
i = i + 1
|
||||||
|
break
|
||||||
|
end
|
||||||
|
-- Read key
|
||||||
|
if str:sub(i, i) ~= '"' then
|
||||||
|
decode_error(str, i, "expected string for key")
|
||||||
|
end
|
||||||
|
key, i = parse(str, i)
|
||||||
|
-- Read ':' delimiter
|
||||||
|
i = next_char(str, i, space_chars, true)
|
||||||
|
if str:sub(i, i) ~= ":" then
|
||||||
|
decode_error(str, i, "expected ':' after key")
|
||||||
|
end
|
||||||
|
i = next_char(str, i + 1, space_chars, true)
|
||||||
|
-- Read value
|
||||||
|
val, i = parse(str, i)
|
||||||
|
-- Set
|
||||||
|
res[key] = val
|
||||||
|
-- Next token
|
||||||
|
i = next_char(str, i, space_chars, true)
|
||||||
|
local chr = str:sub(i, i)
|
||||||
|
i = i + 1
|
||||||
|
if chr == "}" then break end
|
||||||
|
if chr ~= "," then decode_error(str, i, "expected '}' or ','") end
|
||||||
|
end
|
||||||
|
return res, i
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local char_func_map = {
|
||||||
|
[ '"' ] = parse_string,
|
||||||
|
[ "0" ] = parse_number,
|
||||||
|
[ "1" ] = parse_number,
|
||||||
|
[ "2" ] = parse_number,
|
||||||
|
[ "3" ] = parse_number,
|
||||||
|
[ "4" ] = parse_number,
|
||||||
|
[ "5" ] = parse_number,
|
||||||
|
[ "6" ] = parse_number,
|
||||||
|
[ "7" ] = parse_number,
|
||||||
|
[ "8" ] = parse_number,
|
||||||
|
[ "9" ] = parse_number,
|
||||||
|
[ "-" ] = parse_number,
|
||||||
|
[ "t" ] = parse_literal,
|
||||||
|
[ "f" ] = parse_literal,
|
||||||
|
[ "n" ] = parse_literal,
|
||||||
|
[ "[" ] = parse_array,
|
||||||
|
[ "{" ] = parse_object,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
parse = function(str, idx)
|
||||||
|
local chr = str:sub(idx, idx)
|
||||||
|
local f = char_func_map[chr]
|
||||||
|
if f then
|
||||||
|
return f(str, idx)
|
||||||
|
end
|
||||||
|
decode_error(str, idx, "unexpected character '" .. chr .. "'")
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
function json.decode(str)
|
||||||
|
if type(str) ~= "string" then
|
||||||
|
error("expected argument of type string, got " .. type(str))
|
||||||
|
end
|
||||||
|
local res, idx = parse(str, next_char(str, 1, space_chars, true))
|
||||||
|
idx = next_char(str, idx, space_chars, true)
|
||||||
|
if idx <= #str then
|
||||||
|
decode_error(str, idx, "trailing garbage")
|
||||||
|
end
|
||||||
|
return res
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
return json
|
@ -32,20 +32,6 @@ Configuration example:
|
|||||||
}
|
}
|
||||||
]]
|
]]
|
||||||
|
|
||||||
local function checkreq(name, display)
|
|
||||||
local success, library = pcall(function() return require(name) end)
|
|
||||||
if not success then
|
|
||||||
error("'" .. (display or name) .. "' missing or failed to load.")
|
|
||||||
else
|
|
||||||
return library
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
-- local htmlparser = checkreq("htmlparser") -- so that this can be used for its non-HTML functions without htmlparser installed, this check is delayed
|
|
||||||
-- local json = checkreq('json', 'dkjson') -- I replaced the default, currently working on making sure it's properly included here..
|
|
||||||
local json = require("json") -- TODO replace with utility-function loader
|
|
||||||
-- pandoc and curl are also required
|
|
||||||
|
|
||||||
local success, utility = pcall(function()
|
local success, utility = pcall(function()
|
||||||
return dofile(arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)") .. "utility-functions.lua")
|
return dofile(arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)") .. "utility-functions.lua")
|
||||||
end)
|
end)
|
||||||
@ -54,13 +40,14 @@ if not success then
|
|||||||
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
|
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
|
||||||
end
|
end
|
||||||
|
|
||||||
local path_separator = "\\" -- temporarily hard-forcing Windows because it's being SUCH A PILE OF GARBAGE
|
local json = utility.require("json")
|
||||||
-- local path_separator
|
|
||||||
-- if utility.OS == "Windows" then
|
local path_separator
|
||||||
-- path_separator = "\\"
|
if utility.OS == "Windows" then
|
||||||
-- else
|
path_separator = "\\"
|
||||||
-- path_separator = "/"
|
else
|
||||||
-- end
|
path_separator = "/"
|
||||||
|
end
|
||||||
|
|
||||||
-- also checks for errors
|
-- also checks for errors
|
||||||
-- TODO make it check for required elements and error if any are missing!
|
-- TODO make it check for required elements and error if any are missing!
|
||||||
@ -69,7 +56,7 @@ local function get_config()
|
|||||||
print(help)
|
print(help)
|
||||||
error("\nA config file name/path must be specified.")
|
error("\nA config file name/path must be specified.")
|
||||||
elseif arg[1] == "-h" or arg[1] == "--help" then
|
elseif arg[1] == "-h" or arg[1] == "--help" then
|
||||||
error(help)
|
error(help) -- I strongly dislike using an error to print a help message instead of gracefully exiting..
|
||||||
end
|
end
|
||||||
|
|
||||||
local file, err = io.open(arg[1], "r")
|
local file, err = io.open(arg[1], "r")
|
||||||
@ -109,9 +96,7 @@ local function format_metadata(config)
|
|||||||
end
|
end
|
||||||
|
|
||||||
local function download_pages(config)
|
local function download_pages(config)
|
||||||
-- no longer necessary because utility initializes it for us
|
local htmlparser = utility.require("htmlparser")
|
||||||
-- math.randomseed(os.time()) -- for randomized temporary file name and timings to avoid rate limiting
|
|
||||||
local htmlparser = require "htmlparser" -- TODO replace with local load
|
|
||||||
utility.required_program("curl")
|
utility.required_program("curl")
|
||||||
|
|
||||||
os.execute("mkdir Sections")
|
os.execute("mkdir Sections")
|
||||||
|
@ -1,3 +1,15 @@
|
|||||||
|
-- TO USE, PUT THE INTERIOR OF THIS FUNCTION IN YOUR FILE
|
||||||
|
-- this only works if that file is in the same directory as this one - but works no matter where it was called from
|
||||||
|
local function _example_load()
|
||||||
|
local success, utility = pcall(function()
|
||||||
|
return dofile(arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)") .. "utility-functions.lua")
|
||||||
|
end)
|
||||||
|
if not success then
|
||||||
|
print("\n\n" .. tostring(utility))
|
||||||
|
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
math.randomseed(os.time())
|
math.randomseed(os.time())
|
||||||
|
|
||||||
local utility = {}
|
local utility = {}
|
||||||
@ -8,7 +20,7 @@ else
|
|||||||
utility.OS = "UNIX-like"
|
utility.OS = "UNIX-like"
|
||||||
end
|
end
|
||||||
|
|
||||||
utility.path = arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)") -- related to discussion in https://stackoverflow.com/q/6380820
|
utility.path = arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)") -- inspired by discussion in https://stackoverflow.com/q/6380820
|
||||||
|
|
||||||
-- always uses outputting to a temporary file to guarantee safety
|
-- always uses outputting to a temporary file to guarantee safety
|
||||||
function os.capture_safe(command, tmp_file_name)
|
function os.capture_safe(command, tmp_file_name)
|
||||||
@ -39,6 +51,19 @@ function string.trim(s)
|
|||||||
return s:match'^()%s*$' and '' or s:match'^%s*(.*%S)'
|
return s:match'^()%s*$' and '' or s:match'^%s*(.*%S)'
|
||||||
end
|
end
|
||||||
|
|
||||||
|
utility.require = function(name)
|
||||||
|
local success, package_or_err = pcall(function()
|
||||||
|
return dofile(arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)") .. name .. ".lua")
|
||||||
|
end)
|
||||||
|
if success then
|
||||||
|
return package_or_err
|
||||||
|
else
|
||||||
|
print("\n\n" .. tostring(package_or_err))
|
||||||
|
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
-- errors if specified program isn't in the path
|
||||||
utility.required_program = function(name)
|
utility.required_program = function(name)
|
||||||
local command
|
local command
|
||||||
if utility.OS == "Windows" then
|
if utility.OS == "Windows" then
|
||||||
@ -74,6 +99,7 @@ utility.escape_quotes = function(input)
|
|||||||
return input
|
return input
|
||||||
end
|
end
|
||||||
|
|
||||||
|
-- Example, print all items in this directory: utility.ls(".")(print)
|
||||||
utility.ls = function(path)
|
utility.ls = function(path)
|
||||||
local command
|
local command
|
||||||
if utility.OS == "Windows" then
|
if utility.OS == "Windows" then
|
||||||
|
Loading…
Reference in New Issue
Block a user