Compare commits

...

36 Commits

Author SHA1 Message Date
15013d02fe organization of command options 2024-11-09 18:05:54 -07:00
72a474e7cf Revert "video-dl.lua uses browser cookies from Firefox now"
This reverts commit 1e09fb54bd.

Fixes #12
2024-11-09 17:59:03 -07:00
c3733589a3 better design / note added about custom_error_message 2024-11-09 00:51:44 -07:00
942a47330e fix error by which utility.open might not always close file handles 2024-11-09 00:48:44 -07:00
33f9a123e6 added ability to extract titles 2024-11-09 00:42:41 -07:00
f593a3c05b fix #32 extract titles from URL 2024-11-09 00:37:27 -07:00
98365ff861 close #30 optimize base_file_name 2024-11-08 18:22:38 -07:00
98a603e647 utility added to make sure file handles can't be left open 2024-11-08 18:16:58 -07:00
599d5f0612 rm unneeded lines 2024-11-08 17:46:25 -07:00
d33dc72f12 improved ReadMe and new chapter detection pattern 2024-11-07 18:38:14 -07:00
9eb20662ce It turns out my inconsistency issues were all from a single missed file handle close 2024-11-07 17:30:59 -07:00
fd4a164e95 fix race condition? remove requirements listed that are installed automatically 2024-11-07 14:10:40 -07:00
ee6fab1039 #31 actually quiet stops PROMPTING, not display 2024-11-06 20:41:57 -07:00
217c39f53f fix #31 2024-11-06 20:40:32 -07:00
063437b596 close #22 run all configs in a directory 2024-11-06 19:56:44 -07:00
3c648fbc85 fix #28 argparse! :D 2024-11-06 19:45:08 -07:00
257986fd5a partial #24 it is default behavior, not optional 2024-11-06 19:04:16 -07:00
b4a5b9ccad fix #21 make it possible to have multiple ebook confs in the same directory 2024-11-06 19:00:21 -07:00
6b82e4e8d6 minor typo fixes 2024-11-06 18:07:43 -07:00
811e08f963 close #18 copyright warnings 2024-11-06 18:03:40 -07:00
e9a155cf04 close #29 different file name option 2024-11-06 17:17:58 -07:00
eeffe27258 more complete examples 2024-11-06 17:11:15 -07:00
d58f161e9e close #17 automatic_naming implemented 2024-11-06 17:05:00 -07:00
62b9574b56 fix #25 convert pages instead of sections to work around pandoc errors 2024-11-06 16:12:15 -07:00
dec4e24bf4 fixed #14 totally 2024-11-05 21:58:18 -07:00
e1ef52ad69 Fix #20 2024-11-05 21:54:00 -07:00
b3d10e9b6f reorganization! 2024-11-05 21:50:26 -07:00
e14313cff0 fixed examples, added documentation 2024-11-05 21:48:33 -07:00
180e979061 fix #16, improved formatting 2024-11-05 16:19:37 -07:00
6fe135e278 fix critical error where the first keyword/tag was duplicated over and over instead of using all of them 2024-11-05 09:57:36 -07:00
633dc71143 epub notes 2024-11-05 09:42:00 -07:00
7ceb73f9a5 supporting manually listed sections, not documented 2024-11-05 02:25:50 -07:00
4760828063 added cleanup functions :D 2024-11-05 01:11:37 -07:00
3ee0adbd48 added dependencies of make-epub and fixed it :D 2024-11-05 00:53:24 -07:00
defbee8e5c minor fixes to make-epub.lua 2024-11-05 00:28:21 -07:00
20dda8d1ee 'initial' make-epub.lua 2024-11-05 00:16:24 -07:00
20 changed files with 1567 additions and 61 deletions

View File

@ -3,7 +3,13 @@
-- The first time this is run (on Windows), a dialog will appear. -- The first time this is run (on Windows), a dialog will appear.
-- Uncheck the "always show this" thing and click Install. -- Uncheck the "always show this" thing and click Install.
local error_occurred, utility = pcall(function() return dofile(arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)") .. "utility-functions.lua") end) if not error_occurred then error("\n\nThis script is installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n") end local success, utility = pcall(function()
return dofile((arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)")) .. "utility-functions.lua")
end)
if not success then
print("\n\n" .. tostring(utility))
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
end
local for_files = utility.ls() local for_files = utility.ls()
os.execute("mkdir 2pdf-output") os.execute("mkdir 2pdf-output")

View File

@ -16,7 +16,14 @@ if arg[1] and arg[1]:find("help") then
return false return false
end end
local error_occurred, utility = pcall(function() return dofile(arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)") .. "utility-functions.lua") end) if not error_occurred then error("\n\nThis script is installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n") end local success, utility = pcall(function()
return dofile((arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)")) .. "utility-functions.lua")
end)
if not success then
print("\n\n" .. tostring(utility))
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
end
utility.required_program("ffmpeg") utility.required_program("ffmpeg")
local threads = tonumber(arg[1]) or arg[1] or 1 local threads = tonumber(arg[1]) or arg[1] or 1

View File

@ -19,7 +19,14 @@ if arg[1] and arg[1]:find("help") then
return false return false
end end
local error_occurred, utility = pcall(function() return dofile(arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)") .. "utility-functions.lua") end) if not error_occurred then error("\n\nThis script is installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n") end local success, utility = pcall(function()
return dofile((arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)")) .. "utility-functions.lua")
end)
if not success then
print("\n\n" .. tostring(utility))
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
end
utility.required_program("ffmpeg") utility.required_program("ffmpeg")
local tune local tune

101
README.md
View File

@ -1,5 +1,10 @@
# .lua-files # .lua-files
It's like dotfiles, but no, it's just Lua scripts I find useful. Personally convenient Lua scripts to add to my path.
## Installation
1. Put this folder somewhere.
2. Add that somewhere to your path. (On Windows, search for Environment Variables (it's "part of" Control Panel) and use the UI to add them to System variables.)
3. (On Windows) Add `.LUA` to PATHEXT.
## Scripts ## Scripts
- `2webm.lua`: Converts everything in the working directory to .webm files. - `2webm.lua`: Converts everything in the working directory to .webm files.
@ -10,7 +15,93 @@ It's like dotfiles, but no, it's just Lua scripts I find useful.
- `utility-functions.lua`: (Library) Required for many of these scripts to run. - `utility-functions.lua`: (Library) Required for many of these scripts to run.
- `video-dl.lua`: A few premade command lines for using `yt-dlp` to download what I want quicker. - `video-dl.lua`: A few premade command lines for using `yt-dlp` to download what I want quicker.
## Installation ### make-epub.lua
1. Put this folder somewhere. This script is only intended for personal use. Do not use it to infringe on copyright.
2. Add that somewhere to your path. (On Windows, search for Environment Variables (it's "part of" Control Panel) and use the UI to add them to System variables.)
3. (On Windows) Add `.LUA` to PATHEXT. ```
Usage:
make-epub.lua <config (JSON file)> [action]
If "." is used instead of a JSON file, every JSON file in the current directory
will be used to make multiple ebooks back-to-back.
[action]: If not specified, all steps will be taken in order (except cleanall).
download: All pages will be downloaded to their own HTML files.
convert: Each page is converted to Markdown.
concat: A file is created for each section out of its pages.
markdown: Metadata frontmatter and Markdown section files will be
concatenated into a single Markdown file.
epub: Markdown file will be converted to an ePub using pandoc.
cleanpage: All page files will be deleted, along with their extra
directories.
cleanall: Deletes everything except the config file and ePub.
Requirements:
- Binaries: pandoc, curl
```
The JSON config spec has two major variations ("Book" and "Anthology").
The following is shared:
- `authors`: (Optional) Array of Strings: Author names. First in the list is used as a byline in the final output. (Legacy: An `author` string works as well. If this exists, it will be first.)
- `title`: (Optional) String: Title of book.
- `base_file_name`: (Optional) String: Alternate final file name. (Default: "`title` by `author`" or just "`title`".)
- `keywords`: Array of Strings: Keywords/Tags. (I'm not sure what the difference is in the final output so it goes in both.)
- `sections`: **See "Book"/"Anthology" variations.** (I call LitErotica's stories sections - because they are often part of a larger whole.)
- `section_titles`: (Optional) Array of Strings: The titles to be used for Table of Contents / headings. (If `sections.naming` is specified, `section_titles` will be ignored.)
- `extract_titles`: (Optional) Boolean: Titles will be extracted from the first page of every section. (Note: This is compatible with `sections.automatic_naming`, but it can create repeated titles.)
- `lazy_titling`: (Optional) Boolean: URLs will be used to generate section titles. (Warning: This process is likely to create janky titles. Note: This is compatible with `sections.automatic_naming`, but it can create repeated titles.)
- `page_counts`: Array of Integers: The number of pages on LitErotica per "story". (I call them sections because this script was made to put together story series originally.)
#### Variation: Book
- `base_url`: String: A partial URL that is the beginning of the URL used for each section (story) on LitErotica. (This script currently only works for stories that end in a padded two-digit number.) (Technically optional if `first_section_url` is specified, and `sections.start` and `sections.finish` are both `1`.)
- `first_section_url`: (Optional) String: Some stories don't have the same URL structure for their first section. This allows you to specify its full URL.
- `sections`: Object defining which sections to download, and what to call them (ie. Chapters, Parts, ..).
- `start`: (Optional) Number: Where to start. (`1` is the default, since it is the most common.)
- `finish`: Number: Where to end.
- `naming`: (Optional) String: How to name sections in the final output. The result is `[naming] [#]` (using section numbers). (If not specified, sections will not have headings.)
- `automatic_naming`: (Optional) Boolean: If any line matches "Prologue" or "Chapter #" (any number), it will be made into a heading. (Note: This does not override `naming`. Both can be used together.) (Other patterns will be added as I find them.)
Example:
```json
{
"authors": ["Name"],
"title": "Book",
"base_file_name": "Book",
"keywords": ["erotica", "fantasy"],
"base_url": "https://www.literotica.com/s/title-ch-",
"first_section_url": "https://www.literotica.com/s/title",
"sections": {
"start": 1,
"finish": 4,
"naming": "Chapter",
"automatic_naming": true
},
"page_counts": [1, 5, 3, 3]
}
```
#### Variation: Anthology
- `manually_specified_sections`: (Optional) Boolean, must be `true`. Technically not required as the script is capable of figuring out you are using this variation, but *should be* included.
- `sections`: Array of Strings: A complete URL for each story.
- `section_titles`: (**Required**) Array of Strings: The titles to be used for Table of Contents / headings. (Must be in the same order as `sections`.)
Example:
```json
{
"authors": ["Name"],
"title": "Anthology",
"keywords": ["LitErotica", "erotica"],
"manually_specified_sections": true,
"sections": [
"https://www.literotica.com/s/unique-title",
"https://www.literotica.com/s/another-title"
],
"section_titles": [
"Unique Title",
"Another Title"
],
"page_counts": [5, 2]
}
```

272
htmlparser.lua Normal file
View File

@ -0,0 +1,272 @@
-- vim: ft=lua ts=2 sw=2
-- Syntactic Sugar {{{
local function rine(val) -- Return (val) If it's Not Empty (non-zero-length)
return (val and #val>0) and val
end
local function rit(a) -- Return (a) If it's Table
return (type(a) == "table") and a
end
local noop = function() end
local esc = function(s) return string.gsub(s, "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%" .. "%1") end
local str = tostring
local char = string.char
local opts = rit(htmlparser_opts) or {} -- needed for silent/noerr/noout/nonl directives, also needed to be defined before `require` in such case
local prn = opts.silent and noop or function(l,f,...)
local fd = (l=="i") and "stdout" or "stderr"
local t = (" [%s] "):format(l:upper())
io[fd]
:write('[HTMLParser]'..t..f:format(...)
..(opts.nonl or "\n")
)
end
local err = opts.noerr and noop or function(f,...) prn("e",f,...) end
local out = opts.noout and noop or function(f,...) prn("i",f,...) end
local line = debug and function(lvl) return debug.getinfo(lvl or 2).currentline end or noop
local dbg = opts.debug and function(f,...) prn("d",f:gsub("#LINE#",str(line(3))),...) end or noop
-- }}}
-- Requires {{{
-- MODIFIED --
-- local ElementNode = require"htmlparser.ElementNode"
-- local voidelements = require"htmlparser.voidelements"
local success, utility = pcall(function()
return dofile((arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)")) .. "utility-functions.lua")
end)
if not success then
print("\n\n" .. tostring(utility))
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
end
local ElementNode = utility.require("htmlparser/ElementNode")
local voidelements = utility.require("htmlparser/voidelements")
-- MODIFIED --
--}}}
local HtmlParser = {}
local function parse(text,limit) -- {{{
local opts = rine(opts) -- use top-level opts-table (the one, defined before requiring the module), if exists
or rit(htmlparser_opts) -- or defined after requiring (but before calling `parse`)
or {} -- fallback otherwise
opts.looplimit = opts.looplimit or htmlparser_looplimit
local text = str(text)
local limit = limit or opts.looplimit or 1000
local tpl = false
if not opts.keep_comments then -- Strip (or not) comments {{{
text = text:gsub("<!%-%-.-%-%->","") -- Many chances commented code will have syntax errors, that'll lead to parser failures
end -- }}}
local tpr={}
if not opts.keep_danger_placeholders then -- {{{ little speedup by cost of potential parsing breakages
-- search unused "invalid" bytes {{{
local busy,i={},0;
repeat -- {{{
local cc = char(i)
if not(text:match(cc)) then -- {{{
if not(tpr["<"]) or not(tpr[">"]) then -- {{{
if not(busy[i]) then -- {{{
if not(tpr["<"]) then -- {{{
tpr["<"] = cc;
elseif not(tpr[">"]) then
tpr[">"] = cc;
end -- }}}
busy[i] = true
dbg("c:{%s}||cc:{%d}||tpr[c]:{%s}",str(c),cc:byte(),str(tpr[c]))
dbg("busy[i]:{%s},i:{%d}",str(busy[i]),i)
dbg("[FindPH]:#LINE# Success! || i=%d",i)
else -- if !busy
dbg("[FindPH]:#LINE# Busy! || i=%d",i)
end -- if !busy -- }}}
dbg("c:{%s}||cc:{%d}||tpr[c]:{%s}",c,cc:byte(),str(tpr[c]))
dbg("%s",str(busy[i]))
else -- if < or >
dbg("[FindPH]:#LINE# Done!",i)
break
end -- if < or > -- }}}
else -- text!match(cc)
dbg("[FindPH]:#LINE# Text contains this byte! || i=%d",i)
end -- text!match(cc) -- }}}
local skip=1
if i==31 then
skip=96 -- ASCII
end
i=i+skip
until (i==255) -- }}}
i=nil
--- }}}
if not(tpr["<"]) or not(tpr[">"]) then
err("Impossible to find at least two unused byte codes in this HTML-code. We need it to escape bracket-contained placeholders inside tags.")
err("Consider enabling 'keep_danger_placeholders' option (to silence this error, if parser wasn't failed with current HTML-code) or manually replace few random bytes, to free up the codes.")
else
dbg("[FindPH]:#LINE# Found! || '<'=%d, '>'=%d",tpr["<"]:byte(),tpr[">"]:byte())
end
-- dbg("tpr[>] || tpr[] || #busy%d")
-- g {{{
local function g(id,...)
local arg={...}
local orig=arg[id]
arg[id]=arg[id]:gsub("(.)",tpr)
if arg[id] ~= orig then
tpl=true
dbg("[g]:#LINE# orig: %s", str(orig))
dbg("[g]:#LINE# replaced: %s",str(arg[id]))
end
dbg("[g]:#LINE# called, id: %s, arg[id]: %s, args { "..(("{%s}, "):rep(#arg):gsub(", $","")).." }",id,arg[id],...)
dbg("[g]:#LINE# concat(arg): %s",table.concat(arg))
return table.concat(arg)
end
-- g }}}
-- tpl-placeholders and attributes {{{
text=text
:gsub(
"(=[%s]-)".. -- only match attr.values, and not random strings between two random apostrophs
"(%b'')",
function(...)return g(2,...)end
)
:gsub(
"(=[%s]-)".. -- same for "
'(%b"")',
function(...)return g(2,...)end
) -- Escape "<"/">" inside attr.values (see issue #50)
:gsub(
"(<".. -- Match "<",
(opts.tpl_skip_pattern or "[^!]").. -- with exclusion pattern (for example, to ignore comments, which aren't template placeholders, but can legally contain "<"/">" inside.
")([^>]+)".. -- If matched, we want to escape '<'s if we meet them inside tag
"(>)",
function(...)return g(2,...)end
)
:gsub(
"("..
(tpr["<"] or "__FAILED__").. -- Here we search for "<", we escaped in previous gsub (and don't break things if we have no escaping replacement)
")("..
(opts.tpl_marker_pattern or "[^%w%s]").. -- Capture templating symbol
")([%g%s]-)".. -- match placeholder's content
"(%2)(>)".. -- placeholder's tail
"([^>]*>)", -- remainings
function(...)return g(5,...)end
)
-- }}}
end -- }}}
local index = 0
local root = ElementNode:new(index, str(text))
local node, descend, tpos, opentags = root, true, 1, {}
while true do -- MainLoop {{{
if index == limit then -- {{{
err("Main loop reached loop limit (%d). Consider either increasing it or checking HTML-code for syntax errors", limit)
break
end -- }}}
-- openstart/tpos Definitions {{{
local openstart, name
openstart, tpos, name = root._text:find(
"<" .. -- an uncaptured starting "<"
"([%w-]+)" .. -- name = the first word, directly following the "<"
"[^>]*>", -- include, but not capture everything up to the next ">"
tpos)
dbg("[MainLoop]:#LINE# openstart=%s || tpos=%s || name=%s",str(openstart),str(tpos),str(name))
-- }}}
if not name then break end
-- Some more vars {{{
index = index + 1
local tag = ElementNode:new(index, str(name), (node or {}), descend, openstart, tpos)
node = tag
local tagloop
local tagst, apos = tag:gettext(), 1
-- }}}
while true do -- TagLoop {{{
dbg("[TagLoop]:#LINE# tag.name=%s, tagloop=%s",str(tag.name),str(tagloop))
if tagloop == limit then -- {{{
err("Tag parsing loop reached loop limit (%d). Consider either increasing it or checking HTML-code for syntax errors", limit)
break
end -- }}}
-- Attrs {{{
local start, k, eq, quote, v, zsp
start, apos, k, zsp, eq, zsp, quote = tagst:find(
"%s+" .. -- some uncaptured space
"([^%s=/>]+)" .. -- k = an unspaced string up to an optional "=" or the "/" or ">"
"([%s]-)".. -- zero or more spaces
"(=?)" .. -- eq = the optional; "=", else ""
"([%s]-)".. -- zero or more spaces
[=[(['"]?)]=], -- quote = an optional "'" or '"' following the "=", or ""
apos)
dbg("[TagLoop]:#LINE# start=%s || apos=%s || k=%s || zsp='%s' || eq='%s', quote=[%s]",str(start),str(apos),str(k),str(zsp),str(eq),str(quote))
-- }}}
if not k or k == "/>" or k == ">" then break end
-- Pattern {{{
if eq == "=" then
local pattern = "=([^%s>]*)"
if quote ~= "" then
pattern = quote .. "([^" .. quote .. "]*)" .. quote
end
start, apos, v = tagst:find(pattern, apos)
dbg("[TagLoop]:#LINE# start=%s || apos=%s || v=%s || pattern=%s",str(start),str(apos),str(v),str(pattern))
end
-- }}}
v=v or ""
if tpl then -- {{{
for rk,rv in pairs(tpr) do
v = v:gsub(rv,rk)
dbg("[TagLoop]:#LINE# rv=%s || rk=%s",str(rv),str(rk))
end
end -- }}}
dbg("[TagLoop]:#LINE# k=%s || v=%s",str(k),str(v))
tag:addattribute(k, v)
tagloop = (tagloop or 0) + 1
end
-- }}}
if voidelements[tag.name:lower()] then -- {{{
descend = false
tag:close()
else
descend = true
opentags[tag.name] = opentags[tag.name] or {}
table.insert(opentags[tag.name], tag)
end
-- }}}
local closeend = tpos
local closingloop
while true do -- TagCloseLoop {{{
-- Can't remember why did I add that, so comment it for now (and not remove), in case it will be needed again
-- (although, it causes #59 and #60, so it will anyway be needed to rework)
-- if voidelements[tag.name:lower()] then break end -- already closed
if closingloop == limit then
err("Tag closing loop reached loop limit (%d). Consider either increasing it or checking HTML-code for syntax errors", limit)
break
end
local closestart, closing, closename
closestart, closeend, closing, closename = root._text:find("[^<]*<(/?)([%w-]+)", closeend)
dbg("[TagCloseLoop]:#LINE# closestart=%s || closeend=%s || closing=%s || closename=%s",str(closestart),str(closeend),str(closing),str(closename))
if not closing or closing == "" then break end
tag = table.remove(opentags[closename] or {}) or tag -- kludges for the cases of closing void or non-opened tags
closestart = root._text:find("<", closestart)
dbg("[TagCloseLoop]:#LINE# closestart=%s",str(closestart))
tag:close(closestart, closeend + 1)
node = tag.parent
descend = true
closingloop = (closingloop or 0) + 1
end -- }}}
end -- }}}
if tpl then -- {{{
dbg("tpl")
for k,v in pairs(tpr) do
root._text = root._text:gsub(v,k)
end
end -- }}}
return root
end -- }}}
HtmlParser.parse = parse
return HtmlParser

283
htmlparser/ElementNode.lua Normal file
View File

@ -0,0 +1,283 @@
-- vim: ft=lua ts=2
local Set = {}
Set.mt = {__index = Set}
function Set:new(values)
local instance = {}
local isSet if getmetatable(values) == Set.mt then isSet = true end
if type(values) == "table" then
if not isSet and #values > 0 then
for _,v in ipairs(values) do
instance[v] = true
end
else
for k in pairs(values) do
instance[k] = true
end
end
elseif values ~= nil then
instance = {[values] = true}
end
return setmetatable(instance, Set.mt)
end
function Set:add(e)
if e ~= nil then self[e] = true end
return self
end
function Set:remove(e)
if e ~= nil then self[e] = nil end
return self
end
function Set:tolist()
local res = {}
for k in pairs(self) do
table.insert(res, k)
end
return res
end
Set.mt.__add = function (a, b)
local res, a, b = Set:new(), Set:new(a), Set:new(b)
for k in pairs(a) do res[k] = true end
for k in pairs(b) do res[k] = true end
return res
end
-- Subtraction
Set.mt.__sub = function (a, b)
local res, a, b = Set:new(), Set:new(a), Set:new(b)
for k in pairs(a) do res[k] = true end
for k in pairs(b) do res[k] = nil end
return res
end
-- Intersection
Set.mt.__mul = function (a, b)
local res, a, b = Set:new(), Set:new(a), Set:new(b)
for k in pairs(a) do
res[k] = b[k]
end
return res
end
-- String representation
Set.mt.__tostring = function (set)
local s = "{"
local sep = ""
for k in pairs(set) do
s = s .. sep .. tostring(k)
sep = ", "
end
return s .. "}"
end
local ElementNode = {}
ElementNode.mt = {__index = ElementNode}
function ElementNode:new(index, nameortext, node, descend, openstart, openend)
local instance = {
index = index,
name = nameortext,
level = 0,
parent = nil,
root = nil,
nodes = {},
_openstart = openstart, _openend = openend,
_closestart = openstart, _closeend = openend,
attributes = {},
id = nil,
classes = {},
deepernodes = Set:new(),
deeperelements = {}, deeperattributes = {}, deeperids = {}, deeperclasses = {}
}
if not node then
instance.name = "root"
instance.root = instance
instance._text = nameortext
local length = string.len(nameortext)
instance._openstart, instance._openend = 1, length
instance._closestart, instance._closeend = 1, length
elseif descend then
instance.root = node.root
instance.parent = node
instance.level = node.level + 1
table.insert(node.nodes, instance)
else
instance.root = node.root
instance.parent = node.parent or node --XXX: adds some safety but needs more testing for heisenbugs in corner cases
instance.level = node.level
table.insert((node.parent and node.parent.nodes or node.nodes), instance) --XXX: see above about heisenbugs
end
return setmetatable(instance, ElementNode.mt)
end
function ElementNode:gettext()
return string.sub(self.root._text, self._openstart, self._closeend)
end
function ElementNode:settext(c)
self.root._text=c
end
function ElementNode:textonly()
return (self:gettext():gsub("<[^>]*>",""))
end
function ElementNode:getcontent()
return string.sub(self.root._text, self._openend + 1, self._closestart - 1)
end
function ElementNode:addattribute(k, v)
self.attributes[k] = v
if string.lower(k) == "id" then
self.id = v
-- class attribute contains "space-separated tokens", each of which we'd like quick access to
elseif string.lower(k) == "class" then
for class in string.gmatch(v, "%S+") do
table.insert(self.classes, class)
end
end
end
local function insert(table, name, node)
table[name] = table[name] or Set:new()
table[name]:add(node)
end
function ElementNode:close(closestart, closeend)
if closestart and closeend then
self._closestart, self._closeend = closestart, closeend
end
-- inform hihger level nodes about this element's existence in their branches
local node = self
while true do
node = node.parent
if not node then break end
node.deepernodes:add(self)
insert(node.deeperelements, self.name, self)
for k in pairs(self.attributes) do
insert(node.deeperattributes, k, self)
end
if self.id then
insert(node.deeperids, self.id, self)
end
for _,v in ipairs(self.classes) do
insert(node.deeperclasses, v, self)
end
end
end
local function escape(s)
-- escape all ^, $, (, ), %, ., [, ], *, +, - , and ? with a % prefix
return string.gsub(s, "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%" .. "%1")
end
local function select(self, s)
if not s or type(s) ~= "string" or s == "" then return Set:new() end
local sets = {[""] = self.deeperelements, ["["] = self.deeperattributes,
["#"] = self.deeperids, ["."] = self.deeperclasses}
local function match(t, w)
local m, e, v
if t == "[" then w, m, e, v = string.match(w,
"([^=|%*~%$!%^]+)" .. -- w = 1 or more characters up to a possible "=", "|", "*", "~", "$", "!", or "^"
"([|%*~%$!%^]?)" .. -- m = an optional "|", "*", "~", "$", "!", or "^", preceding the optional "="
"(=?)" .. -- e = the optional "="
"(.*)" -- v = anything following the "=", or else ""
)
end
local matched = Set:new(sets[t][w])
-- attribute value selectors
if e == "=" then
if #v < 2 then v = "'" .. v .. "'" end -- values should be quoted
v = string.sub(v, 2, #v - 1) -- strip quotes
if m == "!" then matched = Set:new(self.deepernodes) end -- include those without that attribute
for node in pairs(matched) do
local a = node.attributes[w]
-- equals
if m == "" and a ~= v then matched:remove(node)
-- not equals
elseif m == "!" and a == v then matched:remove(node)
-- prefix
elseif m =="|" and string.match(a, "^[^-]*") ~= v then matched:remove(node)
-- contains
elseif m =="*" and string.match(a, escape(v)) ~= v then matched:remove(node)
-- word
elseif m =="~" then matched:remove(node)
for word in string.gmatch(a, "%S+") do
if word == v then matched:add(node) break end
end
-- starts with
elseif m =="^" and string.match(a, "^" .. escape(v)) ~= v then matched:remove(node)
-- ends with
elseif m =="$" and string.match(a, escape(v) .. "$") ~= v then matched:remove(node)
end
end -- for node
end -- if v
return matched
end
local subjects, resultset, childrenonly = Set:new({self})
for part in string.gmatch(s, "%S+") do
repeat
if part == ">" then childrenonly = true --[[goto nextpart]] break end
resultset = Set:new()
for subject in pairs(subjects) do
local star = subject.deepernodes
if childrenonly then star = Set:new(subject.nodes) end
resultset = resultset + star
end
childrenonly = false
if part == "*" then --[[goto nextpart]] break end
local excludes, filter = Set:new()
local start, pos = 0, 0
while true do
local switch, stype, name, eq, quote
start, pos, switch, stype, name, eq, quote = string.find(part,
"(%(?%)?)" .. -- switch = a possible ( or ) switching the filter on or off
"([:%[#.]?)" .. -- stype = a possible :, [, #, or .
"([%w-_\\]+)" .. -- name = 1 or more alfanumeric chars (+ hyphen, reverse slash and uderscore)
"([|%*~%$!%^]?=?)" .. -- eq = a possible |=, *=, ~=, $=, !=, ^=, or =
"(['\"]?)", -- quote = a ' or " delimiting a possible attribute value
pos + 1
)
if not name then break end
repeat
if ":" == stype then
filter = name
--[[goto nextname]] break
end
if ")" == switch then
filter = nil
end
if "[" == stype and "" ~= quote then
local value
start, pos, value = string.find(part, "(%b" .. quote .. quote .. ")]", pos)
name = name .. eq .. value
end
local matched = match(stype, name)
if filter == "not" then
excludes = excludes + matched
else
resultset = resultset * matched
end
--::nextname::
break
until true
end
resultset = resultset - excludes
subjects = Set:new(resultset)
--::nextpart::
break
until true
end
resultset = resultset:tolist()
table.sort(resultset, function (a, b) return a.index < b.index end)
return resultset
end
function ElementNode:select(s) return select(self, s) end
ElementNode.mt.__call = select
return ElementNode

View File

@ -0,0 +1,19 @@
-- vim: ft=lua ts=2
return {
area = true,
base = true,
br = true,
col = true,
command = true,
embed = true,
hr = true,
img = true,
input = true,
keygen = true,
link = true,
meta = true,
param = true,
source = true,
track = true,
wbr = true
}

388
json.lua Normal file
View File

@ -0,0 +1,388 @@
--
-- json.lua
--
-- Copyright (c) 2020 rxi
--
-- Permission is hereby granted, free of charge, to any person obtaining a copy of
-- this software and associated documentation files (the "Software"), to deal in
-- the Software without restriction, including without limitation the rights to
-- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-- of the Software, and to permit persons to whom the Software is furnished to do
-- so, subject to the following conditions:
--
-- The above copyright notice and this permission notice shall be included in all
-- copies or substantial portions of the Software.
--
-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-- SOFTWARE.
--
local json = { _version = "0.1.2" }
-------------------------------------------------------------------------------
-- Encode
-------------------------------------------------------------------------------
local encode
local escape_char_map = {
[ "\\" ] = "\\",
[ "\"" ] = "\"",
[ "\b" ] = "b",
[ "\f" ] = "f",
[ "\n" ] = "n",
[ "\r" ] = "r",
[ "\t" ] = "t",
}
local escape_char_map_inv = { [ "/" ] = "/" }
for k, v in pairs(escape_char_map) do
escape_char_map_inv[v] = k
end
local function escape_char(c)
return "\\" .. (escape_char_map[c] or string.format("u%04x", c:byte()))
end
local function encode_nil(val)
return "null"
end
local function encode_table(val, stack)
local res = {}
stack = stack or {}
-- Circular reference?
if stack[val] then error("circular reference") end
stack[val] = true
if rawget(val, 1) ~= nil or next(val) == nil then
-- Treat as array -- check keys are valid and it is not sparse
local n = 0
for k in pairs(val) do
if type(k) ~= "number" then
error("invalid table: mixed or invalid key types")
end
n = n + 1
end
if n ~= #val then
error("invalid table: sparse array")
end
-- Encode
for i, v in ipairs(val) do
table.insert(res, encode(v, stack))
end
stack[val] = nil
return "[" .. table.concat(res, ",") .. "]"
else
-- Treat as an object
for k, v in pairs(val) do
if type(k) ~= "string" then
error("invalid table: mixed or invalid key types")
end
table.insert(res, encode(k, stack) .. ":" .. encode(v, stack))
end
stack[val] = nil
return "{" .. table.concat(res, ",") .. "}"
end
end
local function encode_string(val)
return '"' .. val:gsub('[%z\1-\31\\"]', escape_char) .. '"'
end
local function encode_number(val)
-- Check for NaN, -inf and inf
if val ~= val or val <= -math.huge or val >= math.huge then
error("unexpected number value '" .. tostring(val) .. "'")
end
return string.format("%.14g", val)
end
local type_func_map = {
[ "nil" ] = encode_nil,
[ "table" ] = encode_table,
[ "string" ] = encode_string,
[ "number" ] = encode_number,
[ "boolean" ] = tostring,
}
encode = function(val, stack)
local t = type(val)
local f = type_func_map[t]
if f then
return f(val, stack)
end
error("unexpected type '" .. t .. "'")
end
function json.encode(val)
return ( encode(val) )
end
-------------------------------------------------------------------------------
-- Decode
-------------------------------------------------------------------------------
local parse
local function create_set(...)
local res = {}
for i = 1, select("#", ...) do
res[ select(i, ...) ] = true
end
return res
end
local space_chars = create_set(" ", "\t", "\r", "\n")
local delim_chars = create_set(" ", "\t", "\r", "\n", "]", "}", ",")
local escape_chars = create_set("\\", "/", '"', "b", "f", "n", "r", "t", "u")
local literals = create_set("true", "false", "null")
local literal_map = {
[ "true" ] = true,
[ "false" ] = false,
[ "null" ] = nil,
}
local function next_char(str, idx, set, negate)
for i = idx, #str do
if set[str:sub(i, i)] ~= negate then
return i
end
end
return #str + 1
end
local function decode_error(str, idx, msg)
local line_count = 1
local col_count = 1
for i = 1, idx - 1 do
col_count = col_count + 1
if str:sub(i, i) == "\n" then
line_count = line_count + 1
col_count = 1
end
end
error( string.format("%s at line %d col %d", msg, line_count, col_count) )
end
local function codepoint_to_utf8(n)
-- http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&id=iws-appendixa
local f = math.floor
if n <= 0x7f then
return string.char(n)
elseif n <= 0x7ff then
return string.char(f(n / 64) + 192, n % 64 + 128)
elseif n <= 0xffff then
return string.char(f(n / 4096) + 224, f(n % 4096 / 64) + 128, n % 64 + 128)
elseif n <= 0x10ffff then
return string.char(f(n / 262144) + 240, f(n % 262144 / 4096) + 128,
f(n % 4096 / 64) + 128, n % 64 + 128)
end
error( string.format("invalid unicode codepoint '%x'", n) )
end
local function parse_unicode_escape(s)
local n1 = tonumber( s:sub(1, 4), 16 )
local n2 = tonumber( s:sub(7, 10), 16 )
-- Surrogate pair?
if n2 then
return codepoint_to_utf8((n1 - 0xd800) * 0x400 + (n2 - 0xdc00) + 0x10000)
else
return codepoint_to_utf8(n1)
end
end
local function parse_string(str, i)
local res = ""
local j = i + 1
local k = j
while j <= #str do
local x = str:byte(j)
if x < 32 then
decode_error(str, j, "control character in string")
elseif x == 92 then -- `\`: Escape
res = res .. str:sub(k, j - 1)
j = j + 1
local c = str:sub(j, j)
if c == "u" then
local hex = str:match("^[dD][89aAbB]%x%x\\u%x%x%x%x", j + 1)
or str:match("^%x%x%x%x", j + 1)
or decode_error(str, j - 1, "invalid unicode escape in string")
res = res .. parse_unicode_escape(hex)
j = j + #hex
else
if not escape_chars[c] then
decode_error(str, j - 1, "invalid escape char '" .. c .. "' in string")
end
res = res .. escape_char_map_inv[c]
end
k = j + 1
elseif x == 34 then -- `"`: End of string
res = res .. str:sub(k, j - 1)
return res, j + 1
end
j = j + 1
end
decode_error(str, i, "expected closing quote for string")
end
local function parse_number(str, i)
local x = next_char(str, i, delim_chars)
local s = str:sub(i, x - 1)
local n = tonumber(s)
if not n then
decode_error(str, i, "invalid number '" .. s .. "'")
end
return n, x
end
local function parse_literal(str, i)
local x = next_char(str, i, delim_chars)
local word = str:sub(i, x - 1)
if not literals[word] then
decode_error(str, i, "invalid literal '" .. word .. "'")
end
return literal_map[word], x
end
local function parse_array(str, i)
local res = {}
local n = 1
i = i + 1
while 1 do
local x
i = next_char(str, i, space_chars, true)
-- Empty / end of array?
if str:sub(i, i) == "]" then
i = i + 1
break
end
-- Read token
x, i = parse(str, i)
res[n] = x
n = n + 1
-- Next token
i = next_char(str, i, space_chars, true)
local chr = str:sub(i, i)
i = i + 1
if chr == "]" then break end
if chr ~= "," then decode_error(str, i, "expected ']' or ','") end
end
return res, i
end
local function parse_object(str, i)
local res = {}
i = i + 1
while 1 do
local key, val
i = next_char(str, i, space_chars, true)
-- Empty / end of object?
if str:sub(i, i) == "}" then
i = i + 1
break
end
-- Read key
if str:sub(i, i) ~= '"' then
decode_error(str, i, "expected string for key")
end
key, i = parse(str, i)
-- Read ':' delimiter
i = next_char(str, i, space_chars, true)
if str:sub(i, i) ~= ":" then
decode_error(str, i, "expected ':' after key")
end
i = next_char(str, i + 1, space_chars, true)
-- Read value
val, i = parse(str, i)
-- Set
res[key] = val
-- Next token
i = next_char(str, i, space_chars, true)
local chr = str:sub(i, i)
i = i + 1
if chr == "}" then break end
if chr ~= "," then decode_error(str, i, "expected '}' or ','") end
end
return res, i
end
local char_func_map = {
[ '"' ] = parse_string,
[ "0" ] = parse_number,
[ "1" ] = parse_number,
[ "2" ] = parse_number,
[ "3" ] = parse_number,
[ "4" ] = parse_number,
[ "5" ] = parse_number,
[ "6" ] = parse_number,
[ "7" ] = parse_number,
[ "8" ] = parse_number,
[ "9" ] = parse_number,
[ "-" ] = parse_number,
[ "t" ] = parse_literal,
[ "f" ] = parse_literal,
[ "n" ] = parse_literal,
[ "[" ] = parse_array,
[ "{" ] = parse_object,
}
parse = function(str, idx)
local chr = str:sub(idx, idx)
local f = char_func_map[chr]
if f then
return f(str, idx)
end
decode_error(str, idx, "unexpected character '" .. chr .. "'")
end
function json.decode(str)
if type(str) ~= "string" then
error("expected argument of type string, got " .. type(str))
end
local res, idx = parse(str, next_char(str, 1, space_chars, true))
idx = next_char(str, idx, space_chars, true)
if idx <= #str then
decode_error(str, idx, "trailing garbage")
end
return res
end
return json

370
make-epub.lua Executable file
View File

@ -0,0 +1,370 @@
#!/usr/bin/env luajit
local help = [[Usage:
make-epub.lua <config (JSON file)> [action]
If "." is used instead of a JSON file, every JSON file in the current directory
will be used to make multiple ebooks back-to-back.
[action]: If not specified, all steps will be taken in order (except cleanall).
download: All pages will be downloaded to their own HTML files.
convert: Each page is converted to Markdown.
concat: A file is created for each section out of its pages.
markdown: Metadata frontmatter and Markdown section files will be
concatenated into a single Markdown file.
epub: Markdown file will be converted to an ePub using pandoc.
cleanpage: All page files will be deleted, along with their extra
directories.
cleanall: Deletes everything except the config file and ePub.
Requirements:
- Binaries: pandoc, curl
For how to write a configuration and examples, see the .lua-files README:
https://github.com/TangentFoxy/.lua-files#make-epublua
]]
local success, utility = pcall(function()
return dofile((arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)")) .. "utility-functions.lua")
end)
if not success then
print("\n\n" .. tostring(utility))
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
end
local path_separator = utility.path_separator
local copyright_warning = "This ebook was created by an automated tool for personal use. It cannot be distributed or sold without permission of copyright holder(s). (If you did not make this ebook, you may be infringing.)\n\n"
-- also checks for errors TODO make it check for ALL required elements and error if any are missing!
local function load_config(config_file_text)
local json = utility.require("json")
config = json.decode(config_file_text)
config.config_file_text = config_file_text
if not config.authors then
config.authors = {} -- at least have an empty table so it doesn't error below TODO verify that this is actually true
end
if not config.keywords then
config.keywords = {} -- TODO test if it will work empty
end
if config.author then -- old style single author will be prepended to authors list
table.insert(config.authors, 1, config.author)
end
-- if only using a single section
if config.first_section_url and not config.base_url then
config.base_url = config.first_section_url -- prevent errors due to required item being missing
end
-- detecting manually specified sections and flagging it to the rest of the script
if config.sections[1] then
config.sections.start = 1
config.sections.finish = #config.sections
config.manually_specified_sections = true -- decided to make this part of the config spec, but it's set here again just in case
config.base_url = "http://example.com/" -- must be defined to prevent errors; it will be manipulated and ignored in this use case
end
if not config.sections.start then
config.sections.start = 1 -- the first one can be optional since the common use case is ALL OF THEM
end
if #config.page_counts ~= config.sections.finish - config.sections.start + 1 then
error("Number of page_counts does not match number of sections.")
end
if config.section_titles and #config.section_titles ~= config.sections.finish - config.sections.start + 1 then
error("Number of section_titles does not match number of sections.")
end
local base_file_name
if config.title and config.authors[1] then
-- first author in list gets top billing (this is problematic in anthologies unless an editor is the first entry)
base_file_name = config.title .. " by " .. config.authors[1]
elseif config.title then
base_file_name = config.title
else
base_file_name = "Book"
end
config.base_file_name = utility.make_safe_file_name(config.base_file_name or base_file_name)
return config
end
local function format_metadata(config)
local function stringify_list(list)
local output = utility.escape_quotes(list[1]):enquote()
for i = 2, #list do
output = output .. ", " .. utility.escape_quotes(list[i]):enquote()
end
return output
end
local keywords_string = stringify_list(config.keywords)
local metadata = {
"---",
"title: " .. utility.escape_quotes(config.title):enquote(),
"author: [" .. stringify_list(config.authors) .. "]",
"keywords: [" .. keywords_string .. "]",
"tags: [" .. keywords_string .. "]",
"---",
"",
}
return table.concat(metadata, "\n") .. "\n"
end
local function download_pages(config)
local htmlparser = utility.require("htmlparser")
utility.required_program("curl")
local working_dir = config.base_file_name
os.execute("mkdir " .. working_dir:enquote())
for section = config.sections.start, config.sections.finish do
local section_dir = working_dir .. path_separator .. tostring(section) .. path_separator
os.execute("mkdir " .. section_dir:sub(1, -2):enquote())
local section_url
if section == 1 and config.first_section_url then
section_url = config.first_section_url
else
section_url = config.base_url .. string.format("%02i", section) -- leftpad 2 (This will eventually cause problems.)
end
if config.manually_specified_sections then
section_url = config.sections[section]
end
for page = 1, config.page_counts[section - (config.sections.start - 1)] do
local download_url
if page == 1 then
download_url = section_url
else
download_url = section_url .. "?page=" .. tostring(page)
end
local temporary_html_file_name = utility.tmp_file_name()
os.execute("curl " .. download_url:enquote() .. " > " .. temporary_html_file_name)
utility.open(temporary_html_file_name, "r", "Could not download " .. download_url:enquote())(function(html_file)
local raw_html = html_file:read("*all")
local parser = htmlparser.parse(raw_html)
local content_tag = parser:select(".article > div > div") -- TODO add ability to set selector in config!
local text = content_tag[1]:getcontent()
if page == 1 and config.extract_titles then
text = parser:select(".headline")[1]:gettext() .. text
end
utility.open(section_dir .. page .. ".html", "w")(function(page_file)
page_file:write(text .. "\n")
end)
end)
os.execute("rm " .. temporary_html_file_name)
os.execute("sleep " .. tostring(math.random(5))) -- avoid rate limiting
end
end
end
local function convert_pages(config)
utility.required_program("pandoc")
local working_dir = config.base_file_name
for section = config.sections.start, config.sections.finish do
local section_dir = working_dir .. path_separator .. tostring(section) .. path_separator
for page = 1, config.page_counts[section - (config.sections.start - 1)] do
local page_file_name_base = section_dir .. page
os.execute("pandoc --from html --to markdown " .. (page_file_name_base .. ".html"):enquote() .. " -o " .. (page_file_name_base .. ".md"):enquote())
end
end
end
local function concatenate_pages(config)
local working_dir = config.base_file_name
for section = config.sections.start, config.sections.finish do
local section_dir = working_dir .. path_separator .. tostring(section) .. path_separator
utility.open(working_dir .. path_separator .. tostring(section) .. ".md", "w")(function(section_file)
for page = 1, config.page_counts[section - (config.sections.start - 1)] do
utility.open(section_dir .. page .. ".md", "r")(function(page_file)
if config.sections.automatic_naming then
local naming_patterns = {
"^Prologue$",
"^Chapter %d+$",
"^%*%*CHAPTER ",
}
local line = page_file:read("*line")
while line do
for _, pattern in ipairs(naming_patterns) do
if line:find(pattern) then
line = "# " .. line
end
end
section_file:write(line .. "\n")
line = page_file:read("*line")
end
else
section_file:write(page_file:read("*all"))
end
section_file:write("\n") -- guarantees no accidental line collisions
end)
end
end)
end
end
local function write_markdown_file(config)
local working_dir = config.base_file_name
utility.open(config.base_file_name .. ".md", "w")(function(markdown_file)
markdown_file:write(format_metadata(config))
markdown_file:write(copyright_warning)
for section = config.sections.start, config.sections.finish do
if config.sections.naming then
markdown_file:write("\n\n# " .. config.sections.naming .. " " .. tostring(section))
elseif config.section_titles then
markdown_file:write("\n\n# " .. config.section_titles[section])
elseif config.lazy_titling then
local section_url
if section == 1 and config.first_section_url then
section_url = config.first_section_url
else
section_url = config.base_url
end
if config.manually_specified_sections then
section_url = config.sections[section]
end
local title_parts = section_url:sub(30):gsplit("-")
while tonumber(title_parts[#title_parts]) do
title_parts[#title_parts] = nil
end
local last_part = title_parts[#title_parts]
if last_part == "ch" or last_part == "pt" then
title_parts[#title_parts] = nil
end
for index, part in ipairs(title_parts) do
title_parts[index] = part:sub(1, 1):upper() .. part:sub(2)
end
markdown_file:write("\n\n# " .. table.concat(title_parts, " "))
end
markdown_file:write("\n\n")
local section_file_name = working_dir .. path_separator .. tostring(section)
utility.open(section_file_name .. ".md", "r")(function(section_file)
markdown_file:write(section_file:read("*all"))
end)
end
markdown_file:write("\n\n# Ebook Creation Metadata\n\n")
markdown_file:write(copyright_warning)
markdown_file:write("This ebook was created using the following config:\n\n")
markdown_file:write("```json\n" .. config.config_file_text .. "\n```\n")
end)
end
local function make_epub(config)
utility.required_program("pandoc")
local output_dir = "All ePubs"
os.execute("mkdir " .. output_dir:enquote())
local markdown_file_name = config.base_file_name .. ".md"
local epub_file_name = output_dir .. path_separator .. config.base_file_name .. ".epub"
os.execute("pandoc --from markdown --to epub " .. markdown_file_name:enquote() .. " -o " .. epub_file_name:enquote() .. " --toc=true")
end
local function rm_page_files(config)
local working_dir = config.base_file_name
for section = config.sections.start, config.sections.finish do
local section_dir = working_dir .. path_separator .. tostring(section)
os.execute(utility.recursive_remove_command .. section_dir:enquote())
end
end
local function rm_all(config)
local working_dir = config.base_file_name
os.execute(utility.recursive_remove_command .. working_dir:enquote())
os.execute("rm " .. (config.base_file_name .. ".md"):enquote())
end
local function argparse(arguments, positional_arguments)
local recognized_arguments = {}
for index, argument in ipairs(arguments) do
for _, help in ipairs({"-h", "--help", "/?", "/help", "help"}) do
if argument == help then
print(help)
return nil
end
end
if positional_arguments[index] then
recognized_arguments[positional_arguments[index]] = argument
end
end
return recognized_arguments
end
local function main(arguments)
local config = utility.open(arguments.json_file_name, "r")(function(config_file)
return load_config(config_file:read("*all"))
end)
local actions = {
download = download_pages,
convert = convert_pages,
concat = concatenate_pages,
markdown = write_markdown_file,
epub = make_epub,
cleanpage = rm_page_files,
cleanall = rm_all,
}
if arguments.action then
if actions[arguments.action] then
actions[arguments.action](config)
else
print(help)
error("\nInvalid action specified.")
end
else
print("\nDownloading pages...\n")
download_pages(config)
print("\nConverting pages...\n")
convert_pages(config)
print("\nConcatenating pages...\n")
concatenate_pages(config)
print("\nRemoving page files...\n")
rm_page_files(config)
print("\nWriting Markdown file...\n")
write_markdown_file(config)
print("\nMaking ePub...\n")
make_epub(config)
print("\nDone!\n")
end
end
local positional_arguments = {"json_file_name", "action"}
local arguments = argparse(arg, positional_arguments)
if not arguments.json_file_name then
print(help)
error("\nA config file name/path must be specified.")
end
if arguments.json_file_name == "." then
utility.ls(".")(function(file_name)
if file_name:find(".json$") then
arguments.json_file_name = file_name
main(arguments)
end
end)
else
main(arguments)
end

View File

@ -1,51 +1,21 @@
#!/usr/bin/env luajit #!/usr/bin/env luajit
-- if utility-functions.lua has an error, this won't show it, so for testing purposes, I don't use it here local success, utility = pcall(function()
-- local error_occurred, utility = pcall(function() return dofile(arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)") .. "utility-functions.lua") end) if not error_occurred then error("\n\nThis script is installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n") end return dofile((arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)")) .. "utility-functions.lua")
utility = dofile(arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)") .. "utility-functions.lua") end)
if not success then
print("---") print("\n\n" .. tostring(utility))
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
local commands = {
"llm run dolphin-mixtral \"How are you?\"",
"llm run curt \"How are you?\"",
"llm run curt2 \"How are you?\"",
"llm run synx \"How are you?\"",
"llm run synx \"Describe actions you would take as a synx.\"",
"llm run synx \"Describe a synx.\"",
"llm run synx \"What are you?\""
}
-- local llm = loadfile(utility.path .. "llm.lua")
for _, command in ipairs(commands) do
-- print("\n\n\nTEST START", command .. "\n\n\n")
-- print(command:rep(5, "\n"))
for i = 1, 5 do
-- os.execute(command)
-- loadfile(utility.path .. "llm.lua")(command:sub(5))
-- command = command:sub(5)
-- local tab = {}
-- for argument in command:gmatch("%S+") do
-- table.insert(tab, argument)
-- end
-- llm(unpack(tab))
-- print("\nOUTPUT ENDS\n")
-- error("\n\ntmp break\n\n")
-- print(command)
os.execute("echo " .. command .. " >> .run-this-shit.ps1")
end
end end
-- os.execute("echo " .. commands[1] .. " >> .run-this-shit.ps1") local htmlparser = utility.require("htmlparser")
os.execute("pwsh .run-this-shit.ps1") utility.open("TEST.html", "r")(function(html_file)
os.execute("rm .run-this-shit.ps1") local raw_html = html_file:read("*all")
print("---") local parser = htmlparser.parse(raw_html)
local content_tag = parser:select(".article > div > div") -- TODO add ability to set selector in config!
local text = content_tag[1]:getcontent()
local title_tag = parser:select(".headline")
print(title_tag[1]:gettext())
end)

View File

@ -1,14 +1,30 @@
-- TO USE, PUT THE INTERIOR OF THIS FUNCTION IN YOUR FILE
-- this only works if that file is in the same directory as this one - but works no matter where it was called from
local function _example_load()
local success, utility = pcall(function()
return dofile((arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)")) .. "utility-functions.lua")
end)
if not success then
print("\n\n" .. tostring(utility))
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
end
end
math.randomseed(os.time()) math.randomseed(os.time())
local utility = {} local utility = {}
if package.config:sub(1, 1) == "\\" then if package.config:sub(1, 1) == "\\" then
utility.OS = "Windows" utility.OS = "Windows"
utility.path_separator = "\\"
utility.recursive_remove_command = "rmdir /s /q "
else else
utility.OS = "UNIX-like" utility.OS = "UNIX-like"
utility.path_separator = "/"
utility.recursive_remove_command = "rm -r "
end end
utility.path = arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)") -- related to discussion in https://stackoverflow.com/q/6380820 utility.path = arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)") -- inspired by discussion in https://stackoverflow.com/q/6380820
-- always uses outputting to a temporary file to guarantee safety -- always uses outputting to a temporary file to guarantee safety
function os.capture_safe(command, tmp_file_name) function os.capture_safe(command, tmp_file_name)
@ -39,6 +55,43 @@ function string.trim(s)
return s:match'^()%s*$' and '' or s:match'^%s*(.*%S)' return s:match'^()%s*$' and '' or s:match'^%s*(.*%S)'
end end
function string.enquote(s)
return "\"" .. s .. "\""
end
local function escape_special_characters(s)
local special_characters = "[()%%.[^$%]*+%-?]"
if s == nil then return end
return (s:gsub(special_characters, "%%%1"))
end
function string.gsplit(s, delimiter)
delimiter = delimiter or ","
if s:sub(-#delimiter) ~= delimiter then s = s .. delimiter end
return s:gmatch("(.-)" .. escape_special_characters(delimiter))
end
function string.split(s, delimiter)
local result = {}
for item in s:gsplit(delimiter) do
result[#result + 1] = item
end
return result
end
utility.require = function(name)
local success, package_or_err = pcall(function()
return dofile((arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)")) .. name .. ".lua")
end)
if success then
return package_or_err
else
print("\n\n" .. tostring(package_or_err))
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
end
end
-- errors if specified program isn't in the path
utility.required_program = function(name) utility.required_program = function(name)
local command local command
if utility.OS == "Windows" then if utility.OS == "Windows" then
@ -67,6 +120,28 @@ utility.tmp_file_name = function()
return "." .. utility.uuid() .. ".tmp" return "." .. utility.uuid() .. ".tmp"
end end
utility.make_safe_file_name = function(file_name)
file_name = file_name:gsub("[%\"%:%\\%!%@%#%$%%%^%*%=%{%}%|%;%<%>%?%/]", "") -- everything except the &
file_name = file_name:gsub(" %&", ",") -- replacing & with a comma works for 99% of things
file_name = file_name:gsub("%&", ",") -- replacing & with a comma works for 99% of things
file_name = file_name:gsub("[%s+]", " ") -- more than one space in succession should be a single space
return file_name
end
-- io.open, but errors are immediately thrown, and the file is closed for you
utility.open = function(file_name, mode, custom_error_message)
local file, err = io.open(file_name, mode)
if not file then error(custom_error_message or err) end
return function(fn)
local success, result_or_error = pcall(function() return fn(file) end)
file:close()
if not success then
error(result_or_error) -- custom_error_message is only for when the file doesn't exist, this function should not hide *your* errors
end
return result_or_error
end
end
utility.escape_quotes = function(input) utility.escape_quotes = function(input)
-- the order of these commands is important and must be preserved -- the order of these commands is important and must be preserved
input = input:gsub("\\", "\\\\") input = input:gsub("\\", "\\\\")
@ -74,6 +149,7 @@ utility.escape_quotes = function(input)
return input return input
end end
-- Example, print all items in this directory: utility.ls(".")(print)
utility.ls = function(path) utility.ls = function(path)
local command local command
if utility.OS == "Windows" then if utility.OS == "Windows" then

15
utility-quickref-test.lua Normal file
View File

@ -0,0 +1,15 @@
local success, utility = pcall(function()
return dofile((arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)")) .. "utility-functions.lua")
end)
if not success then
print("\n\n" .. tostring(utility))
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
end
print(utility.path)
print(utility.OS)
print(utility.uuid())
print("\n")
utility.ls(".")(print)

View File

@ -16,8 +16,6 @@ local help = [[Usage:
on each. on each.
<url>: Source. YouTube URL expected, but should work with anything <url>: Source. YouTube URL expected, but should work with anything
yt-dlp works with. yt-dlp works with.
IMPORTANT: Expects Firefox to be installed with a YouTube account logged in to get cookies from.
]] ]]
if os.execute("where yt-dlp") ~= 0 then if os.execute("where yt-dlp") ~= 0 then
@ -39,18 +37,22 @@ else
-- "--file" is handled just before execution -- "--file" is handled just before execution
end end
local core_command = "yt-dlp --retries 100 "
local metadata_options = "--write-sub --write-auto-sub --sub-lang \"en.*\" --write-thumbnail --write-description "
local quality_ceiling_720 = "-f \"bestvideo[height<=720]+bestaudio/best[height<=720]\" "
local execute = { local execute = {
backup = function(url) backup = function(url)
os.execute("yt-dlp --retries 100 --write-sub --write-auto-sub --sub-lang \"en.*\" --write-thumbnail --write-description -f \"bestvideo[height<=720]+bestaudio/best[height<=720]\" \"" .. url .."\" --cookies-from-browser \"firefox\"") os.execute(core_command .. metadata_options .. quality_ceiling_720 .. url:enquote())
end, end,
music = function(url) music = function(url)
os.execute("yt-dlp --retries 100 -x --audio-quality 0 \"" .. url .."\" --cookies-from-browser \"firefox\"") os.execute(core_command .. "-x --audio-quality 0 " .. url:enquote())
end, end,
metadata = function(url) metadata = function(url)
os.execute("yt-dlp --retries 100 --write-sub --write-auto-sub --sub-lang \"en.*\" --write-thumbnail --write-description --skip-download \"" .. url .."\" --cookies-from-browser \"firefox\"") os.execute(core_command .. metadata_options .. "--skip-download " .. url:enquote())
end, end,
video = function(url) video = function(url)
os.execute("yt-dlp --retries 100 -f \"bestvideo[height<=720]+bestaudio/best[height<=720]\" \"" .. url .. "\" --cookies-from-browser \"firefox\"") os.execute(core_command .. quality_ceiling_720 .. url:enquote())
end, end,
} }
execute.clone = execute.backup execute.clone = execute.backup