mirror of
https://github.com/TangentFoxy/.lua-files.git
synced 2024-11-20 21:34:23 +00:00
Compare commits
36 Commits
1e09fb54bd
...
15013d02fe
Author | SHA1 | Date | |
---|---|---|---|
15013d02fe | |||
72a474e7cf | |||
c3733589a3 | |||
942a47330e | |||
33f9a123e6 | |||
f593a3c05b | |||
98365ff861 | |||
98a603e647 | |||
599d5f0612 | |||
d33dc72f12 | |||
9eb20662ce | |||
fd4a164e95 | |||
ee6fab1039 | |||
217c39f53f | |||
063437b596 | |||
3c648fbc85 | |||
257986fd5a | |||
b4a5b9ccad | |||
6b82e4e8d6 | |||
811e08f963 | |||
e9a155cf04 | |||
eeffe27258 | |||
d58f161e9e | |||
62b9574b56 | |||
dec4e24bf4 | |||
e1ef52ad69 | |||
b3d10e9b6f | |||
e14313cff0 | |||
180e979061 | |||
6fe135e278 | |||
633dc71143 | |||
7ceb73f9a5 | |||
4760828063 | |||
3ee0adbd48 | |||
defbee8e5c | |||
20dda8d1ee |
8
2pdf.lua
8
2pdf.lua
@ -3,7 +3,13 @@
|
|||||||
-- The first time this is run (on Windows), a dialog will appear.
|
-- The first time this is run (on Windows), a dialog will appear.
|
||||||
-- Uncheck the "always show this" thing and click Install.
|
-- Uncheck the "always show this" thing and click Install.
|
||||||
|
|
||||||
local error_occurred, utility = pcall(function() return dofile(arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)") .. "utility-functions.lua") end) if not error_occurred then error("\n\nThis script is installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n") end
|
local success, utility = pcall(function()
|
||||||
|
return dofile((arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)")) .. "utility-functions.lua")
|
||||||
|
end)
|
||||||
|
if not success then
|
||||||
|
print("\n\n" .. tostring(utility))
|
||||||
|
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
|
||||||
|
end
|
||||||
|
|
||||||
local for_files = utility.ls()
|
local for_files = utility.ls()
|
||||||
os.execute("mkdir 2pdf-output")
|
os.execute("mkdir 2pdf-output")
|
||||||
|
@ -16,7 +16,14 @@ if arg[1] and arg[1]:find("help") then
|
|||||||
return false
|
return false
|
||||||
end
|
end
|
||||||
|
|
||||||
local error_occurred, utility = pcall(function() return dofile(arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)") .. "utility-functions.lua") end) if not error_occurred then error("\n\nThis script is installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n") end
|
local success, utility = pcall(function()
|
||||||
|
return dofile((arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)")) .. "utility-functions.lua")
|
||||||
|
end)
|
||||||
|
if not success then
|
||||||
|
print("\n\n" .. tostring(utility))
|
||||||
|
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
|
||||||
|
end
|
||||||
|
|
||||||
utility.required_program("ffmpeg")
|
utility.required_program("ffmpeg")
|
||||||
|
|
||||||
local threads = tonumber(arg[1]) or arg[1] or 1
|
local threads = tonumber(arg[1]) or arg[1] or 1
|
||||||
|
9
720p.lua
9
720p.lua
@ -19,7 +19,14 @@ if arg[1] and arg[1]:find("help") then
|
|||||||
return false
|
return false
|
||||||
end
|
end
|
||||||
|
|
||||||
local error_occurred, utility = pcall(function() return dofile(arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)") .. "utility-functions.lua") end) if not error_occurred then error("\n\nThis script is installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n") end
|
local success, utility = pcall(function()
|
||||||
|
return dofile((arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)")) .. "utility-functions.lua")
|
||||||
|
end)
|
||||||
|
if not success then
|
||||||
|
print("\n\n" .. tostring(utility))
|
||||||
|
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
|
||||||
|
end
|
||||||
|
|
||||||
utility.required_program("ffmpeg")
|
utility.required_program("ffmpeg")
|
||||||
|
|
||||||
local tune
|
local tune
|
||||||
|
101
README.md
101
README.md
@ -1,5 +1,10 @@
|
|||||||
# .lua-files
|
# .lua-files
|
||||||
It's like dotfiles, but no, it's just Lua scripts I find useful.
|
Personally convenient Lua scripts to add to my path.
|
||||||
|
|
||||||
|
## Installation
|
||||||
|
1. Put this folder somewhere.
|
||||||
|
2. Add that somewhere to your path. (On Windows, search for Environment Variables (it's "part of" Control Panel) and use the UI to add them to System variables.)
|
||||||
|
3. (On Windows) Add `.LUA` to PATHEXT.
|
||||||
|
|
||||||
## Scripts
|
## Scripts
|
||||||
- `2webm.lua`: Converts everything in the working directory to .webm files.
|
- `2webm.lua`: Converts everything in the working directory to .webm files.
|
||||||
@ -10,7 +15,93 @@ It's like dotfiles, but no, it's just Lua scripts I find useful.
|
|||||||
- `utility-functions.lua`: (Library) Required for many of these scripts to run.
|
- `utility-functions.lua`: (Library) Required for many of these scripts to run.
|
||||||
- `video-dl.lua`: A few premade command lines for using `yt-dlp` to download what I want quicker.
|
- `video-dl.lua`: A few premade command lines for using `yt-dlp` to download what I want quicker.
|
||||||
|
|
||||||
## Installation
|
### make-epub.lua
|
||||||
1. Put this folder somewhere.
|
This script is only intended for personal use. Do not use it to infringe on copyright.
|
||||||
2. Add that somewhere to your path. (On Windows, search for Environment Variables (it's "part of" Control Panel) and use the UI to add them to System variables.)
|
|
||||||
3. (On Windows) Add `.LUA` to PATHEXT.
|
```
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
make-epub.lua <config (JSON file)> [action]
|
||||||
|
|
||||||
|
If "." is used instead of a JSON file, every JSON file in the current directory
|
||||||
|
will be used to make multiple ebooks back-to-back.
|
||||||
|
|
||||||
|
[action]: If not specified, all steps will be taken in order (except cleanall).
|
||||||
|
download: All pages will be downloaded to their own HTML files.
|
||||||
|
convert: Each page is converted to Markdown.
|
||||||
|
concat: A file is created for each section out of its pages.
|
||||||
|
markdown: Metadata frontmatter and Markdown section files will be
|
||||||
|
concatenated into a single Markdown file.
|
||||||
|
epub: Markdown file will be converted to an ePub using pandoc.
|
||||||
|
cleanpage: All page files will be deleted, along with their extra
|
||||||
|
directories.
|
||||||
|
cleanall: Deletes everything except the config file and ePub.
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
- Binaries: pandoc, curl
|
||||||
|
```
|
||||||
|
|
||||||
|
The JSON config spec has two major variations ("Book" and "Anthology").
|
||||||
|
|
||||||
|
The following is shared:
|
||||||
|
- `authors`: (Optional) Array of Strings: Author names. First in the list is used as a byline in the final output. (Legacy: An `author` string works as well. If this exists, it will be first.)
|
||||||
|
- `title`: (Optional) String: Title of book.
|
||||||
|
- `base_file_name`: (Optional) String: Alternate final file name. (Default: "`title` by `author`" or just "`title`".)
|
||||||
|
- `keywords`: Array of Strings: Keywords/Tags. (I'm not sure what the difference is in the final output so it goes in both.)
|
||||||
|
- `sections`: **See "Book"/"Anthology" variations.** (I call LitErotica's stories sections - because they are often part of a larger whole.)
|
||||||
|
- `section_titles`: (Optional) Array of Strings: The titles to be used for Table of Contents / headings. (If `sections.naming` is specified, `section_titles` will be ignored.)
|
||||||
|
- `extract_titles`: (Optional) Boolean: Titles will be extracted from the first page of every section. (Note: This is compatible with `sections.automatic_naming`, but it can create repeated titles.)
|
||||||
|
- `lazy_titling`: (Optional) Boolean: URLs will be used to generate section titles. (Warning: This process is likely to create janky titles. Note: This is compatible with `sections.automatic_naming`, but it can create repeated titles.)
|
||||||
|
- `page_counts`: Array of Integers: The number of pages on LitErotica per "story". (I call them sections because this script was made to put together story series originally.)
|
||||||
|
|
||||||
|
#### Variation: Book
|
||||||
|
- `base_url`: String: A partial URL that is the beginning of the URL used for each section (story) on LitErotica. (This script currently only works for stories that end in a padded two-digit number.) (Technically optional if `first_section_url` is specified, and `sections.start` and `sections.finish` are both `1`.)
|
||||||
|
- `first_section_url`: (Optional) String: Some stories don't have the same URL structure for their first section. This allows you to specify its full URL.
|
||||||
|
- `sections`: Object defining which sections to download, and what to call them (ie. Chapters, Parts, ..).
|
||||||
|
- `start`: (Optional) Number: Where to start. (`1` is the default, since it is the most common.)
|
||||||
|
- `finish`: Number: Where to end.
|
||||||
|
- `naming`: (Optional) String: How to name sections in the final output. The result is `[naming] [#]` (using section numbers). (If not specified, sections will not have headings.)
|
||||||
|
- `automatic_naming`: (Optional) Boolean: If any line matches "Prologue" or "Chapter #" (any number), it will be made into a heading. (Note: This does not override `naming`. Both can be used together.) (Other patterns will be added as I find them.)
|
||||||
|
|
||||||
|
Example:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"authors": ["Name"],
|
||||||
|
"title": "Book",
|
||||||
|
"base_file_name": "Book",
|
||||||
|
"keywords": ["erotica", "fantasy"],
|
||||||
|
"base_url": "https://www.literotica.com/s/title-ch-",
|
||||||
|
"first_section_url": "https://www.literotica.com/s/title",
|
||||||
|
"sections": {
|
||||||
|
"start": 1,
|
||||||
|
"finish": 4,
|
||||||
|
"naming": "Chapter",
|
||||||
|
"automatic_naming": true
|
||||||
|
},
|
||||||
|
"page_counts": [1, 5, 3, 3]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Variation: Anthology
|
||||||
|
- `manually_specified_sections`: (Optional) Boolean, must be `true`. Technically not required as the script is capable of figuring out you are using this variation, but *should be* included.
|
||||||
|
- `sections`: Array of Strings: A complete URL for each story.
|
||||||
|
- `section_titles`: (**Required**) Array of Strings: The titles to be used for Table of Contents / headings. (Must be in the same order as `sections`.)
|
||||||
|
|
||||||
|
Example:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"authors": ["Name"],
|
||||||
|
"title": "Anthology",
|
||||||
|
"keywords": ["LitErotica", "erotica"],
|
||||||
|
"manually_specified_sections": true,
|
||||||
|
"sections": [
|
||||||
|
"https://www.literotica.com/s/unique-title",
|
||||||
|
"https://www.literotica.com/s/another-title"
|
||||||
|
],
|
||||||
|
"section_titles": [
|
||||||
|
"Unique Title",
|
||||||
|
"Another Title"
|
||||||
|
],
|
||||||
|
"page_counts": [5, 2]
|
||||||
|
}
|
||||||
|
```
|
||||||
|
272
htmlparser.lua
Normal file
272
htmlparser.lua
Normal file
@ -0,0 +1,272 @@
|
|||||||
|
-- vim: ft=lua ts=2 sw=2
|
||||||
|
|
||||||
|
-- Syntactic Sugar {{{
|
||||||
|
local function rine(val) -- Return (val) If it's Not Empty (non-zero-length)
|
||||||
|
return (val and #val>0) and val
|
||||||
|
end
|
||||||
|
local function rit(a) -- Return (a) If it's Table
|
||||||
|
return (type(a) == "table") and a
|
||||||
|
end
|
||||||
|
local noop = function() end
|
||||||
|
local esc = function(s) return string.gsub(s, "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%" .. "%1") end
|
||||||
|
local str = tostring
|
||||||
|
local char = string.char
|
||||||
|
local opts = rit(htmlparser_opts) or {} -- needed for silent/noerr/noout/nonl directives, also needed to be defined before `require` in such case
|
||||||
|
local prn = opts.silent and noop or function(l,f,...)
|
||||||
|
local fd = (l=="i") and "stdout" or "stderr"
|
||||||
|
local t = (" [%s] "):format(l:upper())
|
||||||
|
io[fd]
|
||||||
|
:write('[HTMLParser]'..t..f:format(...)
|
||||||
|
..(opts.nonl or "\n")
|
||||||
|
)
|
||||||
|
end
|
||||||
|
local err = opts.noerr and noop or function(f,...) prn("e",f,...) end
|
||||||
|
local out = opts.noout and noop or function(f,...) prn("i",f,...) end
|
||||||
|
local line = debug and function(lvl) return debug.getinfo(lvl or 2).currentline end or noop
|
||||||
|
local dbg = opts.debug and function(f,...) prn("d",f:gsub("#LINE#",str(line(3))),...) end or noop
|
||||||
|
-- }}}
|
||||||
|
-- Requires {{{
|
||||||
|
|
||||||
|
-- MODIFIED --
|
||||||
|
|
||||||
|
-- local ElementNode = require"htmlparser.ElementNode"
|
||||||
|
-- local voidelements = require"htmlparser.voidelements"
|
||||||
|
local success, utility = pcall(function()
|
||||||
|
return dofile((arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)")) .. "utility-functions.lua")
|
||||||
|
end)
|
||||||
|
if not success then
|
||||||
|
print("\n\n" .. tostring(utility))
|
||||||
|
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
|
||||||
|
end
|
||||||
|
|
||||||
|
local ElementNode = utility.require("htmlparser/ElementNode")
|
||||||
|
local voidelements = utility.require("htmlparser/voidelements")
|
||||||
|
|
||||||
|
-- MODIFIED --
|
||||||
|
|
||||||
|
--}}}
|
||||||
|
local HtmlParser = {}
|
||||||
|
local function parse(text,limit) -- {{{
|
||||||
|
local opts = rine(opts) -- use top-level opts-table (the one, defined before requiring the module), if exists
|
||||||
|
or rit(htmlparser_opts) -- or defined after requiring (but before calling `parse`)
|
||||||
|
or {} -- fallback otherwise
|
||||||
|
opts.looplimit = opts.looplimit or htmlparser_looplimit
|
||||||
|
|
||||||
|
local text = str(text)
|
||||||
|
local limit = limit or opts.looplimit or 1000
|
||||||
|
local tpl = false
|
||||||
|
|
||||||
|
if not opts.keep_comments then -- Strip (or not) comments {{{
|
||||||
|
text = text:gsub("<!%-%-.-%-%->","") -- Many chances commented code will have syntax errors, that'll lead to parser failures
|
||||||
|
end -- }}}
|
||||||
|
|
||||||
|
local tpr={}
|
||||||
|
|
||||||
|
if not opts.keep_danger_placeholders then -- {{{ little speedup by cost of potential parsing breakages
|
||||||
|
-- search unused "invalid" bytes {{{
|
||||||
|
local busy,i={},0;
|
||||||
|
repeat -- {{{
|
||||||
|
local cc = char(i)
|
||||||
|
if not(text:match(cc)) then -- {{{
|
||||||
|
if not(tpr["<"]) or not(tpr[">"]) then -- {{{
|
||||||
|
if not(busy[i]) then -- {{{
|
||||||
|
if not(tpr["<"]) then -- {{{
|
||||||
|
tpr["<"] = cc;
|
||||||
|
elseif not(tpr[">"]) then
|
||||||
|
tpr[">"] = cc;
|
||||||
|
end -- }}}
|
||||||
|
busy[i] = true
|
||||||
|
dbg("c:{%s}||cc:{%d}||tpr[c]:{%s}",str(c),cc:byte(),str(tpr[c]))
|
||||||
|
dbg("busy[i]:{%s},i:{%d}",str(busy[i]),i)
|
||||||
|
dbg("[FindPH]:#LINE# Success! || i=%d",i)
|
||||||
|
else -- if !busy
|
||||||
|
dbg("[FindPH]:#LINE# Busy! || i=%d",i)
|
||||||
|
end -- if !busy -- }}}
|
||||||
|
dbg("c:{%s}||cc:{%d}||tpr[c]:{%s}",c,cc:byte(),str(tpr[c]))
|
||||||
|
dbg("%s",str(busy[i]))
|
||||||
|
else -- if < or >
|
||||||
|
dbg("[FindPH]:#LINE# Done!",i)
|
||||||
|
break
|
||||||
|
end -- if < or > -- }}}
|
||||||
|
else -- text!match(cc)
|
||||||
|
dbg("[FindPH]:#LINE# Text contains this byte! || i=%d",i)
|
||||||
|
end -- text!match(cc) -- }}}
|
||||||
|
local skip=1
|
||||||
|
if i==31 then
|
||||||
|
skip=96 -- ASCII
|
||||||
|
end
|
||||||
|
i=i+skip
|
||||||
|
until (i==255) -- }}}
|
||||||
|
i=nil
|
||||||
|
--- }}}
|
||||||
|
|
||||||
|
if not(tpr["<"]) or not(tpr[">"]) then
|
||||||
|
err("Impossible to find at least two unused byte codes in this HTML-code. We need it to escape bracket-contained placeholders inside tags.")
|
||||||
|
err("Consider enabling 'keep_danger_placeholders' option (to silence this error, if parser wasn't failed with current HTML-code) or manually replace few random bytes, to free up the codes.")
|
||||||
|
else
|
||||||
|
dbg("[FindPH]:#LINE# Found! || '<'=%d, '>'=%d",tpr["<"]:byte(),tpr[">"]:byte())
|
||||||
|
end
|
||||||
|
|
||||||
|
-- dbg("tpr[>] || tpr[] || #busy%d")
|
||||||
|
|
||||||
|
-- g {{{
|
||||||
|
local function g(id,...)
|
||||||
|
local arg={...}
|
||||||
|
local orig=arg[id]
|
||||||
|
arg[id]=arg[id]:gsub("(.)",tpr)
|
||||||
|
if arg[id] ~= orig then
|
||||||
|
tpl=true
|
||||||
|
dbg("[g]:#LINE# orig: %s", str(orig))
|
||||||
|
dbg("[g]:#LINE# replaced: %s",str(arg[id]))
|
||||||
|
end
|
||||||
|
dbg("[g]:#LINE# called, id: %s, arg[id]: %s, args { "..(("{%s}, "):rep(#arg):gsub(", $","")).." }",id,arg[id],...)
|
||||||
|
dbg("[g]:#LINE# concat(arg): %s",table.concat(arg))
|
||||||
|
return table.concat(arg)
|
||||||
|
end
|
||||||
|
-- g }}}
|
||||||
|
|
||||||
|
-- tpl-placeholders and attributes {{{
|
||||||
|
text=text
|
||||||
|
:gsub(
|
||||||
|
"(=[%s]-)".. -- only match attr.values, and not random strings between two random apostrophs
|
||||||
|
"(%b'')",
|
||||||
|
function(...)return g(2,...)end
|
||||||
|
)
|
||||||
|
:gsub(
|
||||||
|
"(=[%s]-)".. -- same for "
|
||||||
|
'(%b"")',
|
||||||
|
function(...)return g(2,...)end
|
||||||
|
) -- Escape "<"/">" inside attr.values (see issue #50)
|
||||||
|
:gsub(
|
||||||
|
"(<".. -- Match "<",
|
||||||
|
(opts.tpl_skip_pattern or "[^!]").. -- with exclusion pattern (for example, to ignore comments, which aren't template placeholders, but can legally contain "<"/">" inside.
|
||||||
|
")([^>]+)".. -- If matched, we want to escape '<'s if we meet them inside tag
|
||||||
|
"(>)",
|
||||||
|
function(...)return g(2,...)end
|
||||||
|
)
|
||||||
|
:gsub(
|
||||||
|
"("..
|
||||||
|
(tpr["<"] or "__FAILED__").. -- Here we search for "<", we escaped in previous gsub (and don't break things if we have no escaping replacement)
|
||||||
|
")("..
|
||||||
|
(opts.tpl_marker_pattern or "[^%w%s]").. -- Capture templating symbol
|
||||||
|
")([%g%s]-)".. -- match placeholder's content
|
||||||
|
"(%2)(>)".. -- placeholder's tail
|
||||||
|
"([^>]*>)", -- remainings
|
||||||
|
function(...)return g(5,...)end
|
||||||
|
)
|
||||||
|
-- }}}
|
||||||
|
end -- }}}
|
||||||
|
|
||||||
|
local index = 0
|
||||||
|
local root = ElementNode:new(index, str(text))
|
||||||
|
local node, descend, tpos, opentags = root, true, 1, {}
|
||||||
|
|
||||||
|
while true do -- MainLoop {{{
|
||||||
|
if index == limit then -- {{{
|
||||||
|
err("Main loop reached loop limit (%d). Consider either increasing it or checking HTML-code for syntax errors", limit)
|
||||||
|
break
|
||||||
|
end -- }}}
|
||||||
|
-- openstart/tpos Definitions {{{
|
||||||
|
local openstart, name
|
||||||
|
openstart, tpos, name = root._text:find(
|
||||||
|
"<" .. -- an uncaptured starting "<"
|
||||||
|
"([%w-]+)" .. -- name = the first word, directly following the "<"
|
||||||
|
"[^>]*>", -- include, but not capture everything up to the next ">"
|
||||||
|
tpos)
|
||||||
|
dbg("[MainLoop]:#LINE# openstart=%s || tpos=%s || name=%s",str(openstart),str(tpos),str(name))
|
||||||
|
-- }}}
|
||||||
|
if not name then break end
|
||||||
|
-- Some more vars {{{
|
||||||
|
index = index + 1
|
||||||
|
local tag = ElementNode:new(index, str(name), (node or {}), descend, openstart, tpos)
|
||||||
|
node = tag
|
||||||
|
local tagloop
|
||||||
|
local tagst, apos = tag:gettext(), 1
|
||||||
|
-- }}}
|
||||||
|
while true do -- TagLoop {{{
|
||||||
|
dbg("[TagLoop]:#LINE# tag.name=%s, tagloop=%s",str(tag.name),str(tagloop))
|
||||||
|
if tagloop == limit then -- {{{
|
||||||
|
err("Tag parsing loop reached loop limit (%d). Consider either increasing it or checking HTML-code for syntax errors", limit)
|
||||||
|
break
|
||||||
|
end -- }}}
|
||||||
|
-- Attrs {{{
|
||||||
|
local start, k, eq, quote, v, zsp
|
||||||
|
start, apos, k, zsp, eq, zsp, quote = tagst:find(
|
||||||
|
"%s+" .. -- some uncaptured space
|
||||||
|
"([^%s=/>]+)" .. -- k = an unspaced string up to an optional "=" or the "/" or ">"
|
||||||
|
"([%s]-)".. -- zero or more spaces
|
||||||
|
"(=?)" .. -- eq = the optional; "=", else ""
|
||||||
|
"([%s]-)".. -- zero or more spaces
|
||||||
|
[=[(['"]?)]=], -- quote = an optional "'" or '"' following the "=", or ""
|
||||||
|
apos)
|
||||||
|
dbg("[TagLoop]:#LINE# start=%s || apos=%s || k=%s || zsp='%s' || eq='%s', quote=[%s]",str(start),str(apos),str(k),str(zsp),str(eq),str(quote))
|
||||||
|
-- }}}
|
||||||
|
if not k or k == "/>" or k == ">" then break end
|
||||||
|
-- Pattern {{{
|
||||||
|
if eq == "=" then
|
||||||
|
local pattern = "=([^%s>]*)"
|
||||||
|
if quote ~= "" then
|
||||||
|
pattern = quote .. "([^" .. quote .. "]*)" .. quote
|
||||||
|
end
|
||||||
|
start, apos, v = tagst:find(pattern, apos)
|
||||||
|
dbg("[TagLoop]:#LINE# start=%s || apos=%s || v=%s || pattern=%s",str(start),str(apos),str(v),str(pattern))
|
||||||
|
end
|
||||||
|
-- }}}
|
||||||
|
v=v or ""
|
||||||
|
if tpl then -- {{{
|
||||||
|
for rk,rv in pairs(tpr) do
|
||||||
|
v = v:gsub(rv,rk)
|
||||||
|
dbg("[TagLoop]:#LINE# rv=%s || rk=%s",str(rv),str(rk))
|
||||||
|
end
|
||||||
|
end -- }}}
|
||||||
|
|
||||||
|
dbg("[TagLoop]:#LINE# k=%s || v=%s",str(k),str(v))
|
||||||
|
tag:addattribute(k, v)
|
||||||
|
tagloop = (tagloop or 0) + 1
|
||||||
|
end
|
||||||
|
-- }}}
|
||||||
|
if voidelements[tag.name:lower()] then -- {{{
|
||||||
|
descend = false
|
||||||
|
tag:close()
|
||||||
|
else
|
||||||
|
descend = true
|
||||||
|
opentags[tag.name] = opentags[tag.name] or {}
|
||||||
|
table.insert(opentags[tag.name], tag)
|
||||||
|
end
|
||||||
|
-- }}}
|
||||||
|
local closeend = tpos
|
||||||
|
local closingloop
|
||||||
|
while true do -- TagCloseLoop {{{
|
||||||
|
-- Can't remember why did I add that, so comment it for now (and not remove), in case it will be needed again
|
||||||
|
-- (although, it causes #59 and #60, so it will anyway be needed to rework)
|
||||||
|
-- if voidelements[tag.name:lower()] then break end -- already closed
|
||||||
|
if closingloop == limit then
|
||||||
|
err("Tag closing loop reached loop limit (%d). Consider either increasing it or checking HTML-code for syntax errors", limit)
|
||||||
|
break
|
||||||
|
end
|
||||||
|
|
||||||
|
local closestart, closing, closename
|
||||||
|
closestart, closeend, closing, closename = root._text:find("[^<]*<(/?)([%w-]+)", closeend)
|
||||||
|
dbg("[TagCloseLoop]:#LINE# closestart=%s || closeend=%s || closing=%s || closename=%s",str(closestart),str(closeend),str(closing),str(closename))
|
||||||
|
|
||||||
|
if not closing or closing == "" then break end
|
||||||
|
|
||||||
|
tag = table.remove(opentags[closename] or {}) or tag -- kludges for the cases of closing void or non-opened tags
|
||||||
|
closestart = root._text:find("<", closestart)
|
||||||
|
dbg("[TagCloseLoop]:#LINE# closestart=%s",str(closestart))
|
||||||
|
tag:close(closestart, closeend + 1)
|
||||||
|
node = tag.parent
|
||||||
|
descend = true
|
||||||
|
closingloop = (closingloop or 0) + 1
|
||||||
|
end -- }}}
|
||||||
|
end -- }}}
|
||||||
|
if tpl then -- {{{
|
||||||
|
dbg("tpl")
|
||||||
|
for k,v in pairs(tpr) do
|
||||||
|
root._text = root._text:gsub(v,k)
|
||||||
|
end
|
||||||
|
end -- }}}
|
||||||
|
return root
|
||||||
|
end -- }}}
|
||||||
|
HtmlParser.parse = parse
|
||||||
|
return HtmlParser
|
283
htmlparser/ElementNode.lua
Normal file
283
htmlparser/ElementNode.lua
Normal file
@ -0,0 +1,283 @@
|
|||||||
|
-- vim: ft=lua ts=2
|
||||||
|
local Set = {}
|
||||||
|
Set.mt = {__index = Set}
|
||||||
|
function Set:new(values)
|
||||||
|
local instance = {}
|
||||||
|
local isSet if getmetatable(values) == Set.mt then isSet = true end
|
||||||
|
if type(values) == "table" then
|
||||||
|
if not isSet and #values > 0 then
|
||||||
|
for _,v in ipairs(values) do
|
||||||
|
instance[v] = true
|
||||||
|
end
|
||||||
|
else
|
||||||
|
for k in pairs(values) do
|
||||||
|
instance[k] = true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
elseif values ~= nil then
|
||||||
|
instance = {[values] = true}
|
||||||
|
end
|
||||||
|
return setmetatable(instance, Set.mt)
|
||||||
|
end
|
||||||
|
|
||||||
|
function Set:add(e)
|
||||||
|
if e ~= nil then self[e] = true end
|
||||||
|
return self
|
||||||
|
end
|
||||||
|
|
||||||
|
function Set:remove(e)
|
||||||
|
if e ~= nil then self[e] = nil end
|
||||||
|
return self
|
||||||
|
end
|
||||||
|
|
||||||
|
function Set:tolist()
|
||||||
|
local res = {}
|
||||||
|
for k in pairs(self) do
|
||||||
|
table.insert(res, k)
|
||||||
|
end
|
||||||
|
return res
|
||||||
|
end
|
||||||
|
|
||||||
|
Set.mt.__add = function (a, b)
|
||||||
|
local res, a, b = Set:new(), Set:new(a), Set:new(b)
|
||||||
|
for k in pairs(a) do res[k] = true end
|
||||||
|
for k in pairs(b) do res[k] = true end
|
||||||
|
return res
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Subtraction
|
||||||
|
Set.mt.__sub = function (a, b)
|
||||||
|
local res, a, b = Set:new(), Set:new(a), Set:new(b)
|
||||||
|
for k in pairs(a) do res[k] = true end
|
||||||
|
for k in pairs(b) do res[k] = nil end
|
||||||
|
return res
|
||||||
|
end
|
||||||
|
|
||||||
|
-- Intersection
|
||||||
|
Set.mt.__mul = function (a, b)
|
||||||
|
local res, a, b = Set:new(), Set:new(a), Set:new(b)
|
||||||
|
for k in pairs(a) do
|
||||||
|
res[k] = b[k]
|
||||||
|
end
|
||||||
|
return res
|
||||||
|
end
|
||||||
|
|
||||||
|
-- String representation
|
||||||
|
Set.mt.__tostring = function (set)
|
||||||
|
local s = "{"
|
||||||
|
local sep = ""
|
||||||
|
for k in pairs(set) do
|
||||||
|
s = s .. sep .. tostring(k)
|
||||||
|
sep = ", "
|
||||||
|
end
|
||||||
|
return s .. "}"
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local ElementNode = {}
|
||||||
|
ElementNode.mt = {__index = ElementNode}
|
||||||
|
function ElementNode:new(index, nameortext, node, descend, openstart, openend)
|
||||||
|
local instance = {
|
||||||
|
index = index,
|
||||||
|
name = nameortext,
|
||||||
|
level = 0,
|
||||||
|
parent = nil,
|
||||||
|
root = nil,
|
||||||
|
nodes = {},
|
||||||
|
_openstart = openstart, _openend = openend,
|
||||||
|
_closestart = openstart, _closeend = openend,
|
||||||
|
attributes = {},
|
||||||
|
id = nil,
|
||||||
|
classes = {},
|
||||||
|
deepernodes = Set:new(),
|
||||||
|
deeperelements = {}, deeperattributes = {}, deeperids = {}, deeperclasses = {}
|
||||||
|
}
|
||||||
|
if not node then
|
||||||
|
instance.name = "root"
|
||||||
|
instance.root = instance
|
||||||
|
instance._text = nameortext
|
||||||
|
local length = string.len(nameortext)
|
||||||
|
instance._openstart, instance._openend = 1, length
|
||||||
|
instance._closestart, instance._closeend = 1, length
|
||||||
|
elseif descend then
|
||||||
|
instance.root = node.root
|
||||||
|
instance.parent = node
|
||||||
|
instance.level = node.level + 1
|
||||||
|
table.insert(node.nodes, instance)
|
||||||
|
else
|
||||||
|
instance.root = node.root
|
||||||
|
instance.parent = node.parent or node --XXX: adds some safety but needs more testing for heisenbugs in corner cases
|
||||||
|
instance.level = node.level
|
||||||
|
table.insert((node.parent and node.parent.nodes or node.nodes), instance) --XXX: see above about heisenbugs
|
||||||
|
end
|
||||||
|
return setmetatable(instance, ElementNode.mt)
|
||||||
|
end
|
||||||
|
|
||||||
|
function ElementNode:gettext()
|
||||||
|
return string.sub(self.root._text, self._openstart, self._closeend)
|
||||||
|
end
|
||||||
|
|
||||||
|
function ElementNode:settext(c)
|
||||||
|
self.root._text=c
|
||||||
|
end
|
||||||
|
|
||||||
|
function ElementNode:textonly()
|
||||||
|
return (self:gettext():gsub("<[^>]*>",""))
|
||||||
|
end
|
||||||
|
|
||||||
|
function ElementNode:getcontent()
|
||||||
|
return string.sub(self.root._text, self._openend + 1, self._closestart - 1)
|
||||||
|
end
|
||||||
|
|
||||||
|
function ElementNode:addattribute(k, v)
|
||||||
|
self.attributes[k] = v
|
||||||
|
if string.lower(k) == "id" then
|
||||||
|
self.id = v
|
||||||
|
-- class attribute contains "space-separated tokens", each of which we'd like quick access to
|
||||||
|
elseif string.lower(k) == "class" then
|
||||||
|
for class in string.gmatch(v, "%S+") do
|
||||||
|
table.insert(self.classes, class)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
local function insert(table, name, node)
|
||||||
|
table[name] = table[name] or Set:new()
|
||||||
|
table[name]:add(node)
|
||||||
|
end
|
||||||
|
|
||||||
|
function ElementNode:close(closestart, closeend)
|
||||||
|
if closestart and closeend then
|
||||||
|
self._closestart, self._closeend = closestart, closeend
|
||||||
|
end
|
||||||
|
-- inform hihger level nodes about this element's existence in their branches
|
||||||
|
local node = self
|
||||||
|
while true do
|
||||||
|
node = node.parent
|
||||||
|
if not node then break end
|
||||||
|
node.deepernodes:add(self)
|
||||||
|
insert(node.deeperelements, self.name, self)
|
||||||
|
for k in pairs(self.attributes) do
|
||||||
|
insert(node.deeperattributes, k, self)
|
||||||
|
end
|
||||||
|
if self.id then
|
||||||
|
insert(node.deeperids, self.id, self)
|
||||||
|
end
|
||||||
|
for _,v in ipairs(self.classes) do
|
||||||
|
insert(node.deeperclasses, v, self)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
local function escape(s)
|
||||||
|
-- escape all ^, $, (, ), %, ., [, ], *, +, - , and ? with a % prefix
|
||||||
|
return string.gsub(s, "([%^%$%(%)%%%.%[%]%*%+%-%?])", "%%" .. "%1")
|
||||||
|
end
|
||||||
|
|
||||||
|
local function select(self, s)
|
||||||
|
if not s or type(s) ~= "string" or s == "" then return Set:new() end
|
||||||
|
local sets = {[""] = self.deeperelements, ["["] = self.deeperattributes,
|
||||||
|
["#"] = self.deeperids, ["."] = self.deeperclasses}
|
||||||
|
local function match(t, w)
|
||||||
|
local m, e, v
|
||||||
|
if t == "[" then w, m, e, v = string.match(w,
|
||||||
|
"([^=|%*~%$!%^]+)" .. -- w = 1 or more characters up to a possible "=", "|", "*", "~", "$", "!", or "^"
|
||||||
|
"([|%*~%$!%^]?)" .. -- m = an optional "|", "*", "~", "$", "!", or "^", preceding the optional "="
|
||||||
|
"(=?)" .. -- e = the optional "="
|
||||||
|
"(.*)" -- v = anything following the "=", or else ""
|
||||||
|
)
|
||||||
|
end
|
||||||
|
local matched = Set:new(sets[t][w])
|
||||||
|
-- attribute value selectors
|
||||||
|
if e == "=" then
|
||||||
|
if #v < 2 then v = "'" .. v .. "'" end -- values should be quoted
|
||||||
|
v = string.sub(v, 2, #v - 1) -- strip quotes
|
||||||
|
if m == "!" then matched = Set:new(self.deepernodes) end -- include those without that attribute
|
||||||
|
for node in pairs(matched) do
|
||||||
|
local a = node.attributes[w]
|
||||||
|
-- equals
|
||||||
|
if m == "" and a ~= v then matched:remove(node)
|
||||||
|
-- not equals
|
||||||
|
elseif m == "!" and a == v then matched:remove(node)
|
||||||
|
-- prefix
|
||||||
|
elseif m =="|" and string.match(a, "^[^-]*") ~= v then matched:remove(node)
|
||||||
|
-- contains
|
||||||
|
elseif m =="*" and string.match(a, escape(v)) ~= v then matched:remove(node)
|
||||||
|
-- word
|
||||||
|
elseif m =="~" then matched:remove(node)
|
||||||
|
for word in string.gmatch(a, "%S+") do
|
||||||
|
if word == v then matched:add(node) break end
|
||||||
|
end
|
||||||
|
-- starts with
|
||||||
|
elseif m =="^" and string.match(a, "^" .. escape(v)) ~= v then matched:remove(node)
|
||||||
|
-- ends with
|
||||||
|
elseif m =="$" and string.match(a, escape(v) .. "$") ~= v then matched:remove(node)
|
||||||
|
end
|
||||||
|
end -- for node
|
||||||
|
end -- if v
|
||||||
|
return matched
|
||||||
|
end
|
||||||
|
|
||||||
|
local subjects, resultset, childrenonly = Set:new({self})
|
||||||
|
for part in string.gmatch(s, "%S+") do
|
||||||
|
repeat
|
||||||
|
if part == ">" then childrenonly = true --[[goto nextpart]] break end
|
||||||
|
resultset = Set:new()
|
||||||
|
for subject in pairs(subjects) do
|
||||||
|
local star = subject.deepernodes
|
||||||
|
if childrenonly then star = Set:new(subject.nodes) end
|
||||||
|
resultset = resultset + star
|
||||||
|
end
|
||||||
|
childrenonly = false
|
||||||
|
if part == "*" then --[[goto nextpart]] break end
|
||||||
|
local excludes, filter = Set:new()
|
||||||
|
local start, pos = 0, 0
|
||||||
|
while true do
|
||||||
|
local switch, stype, name, eq, quote
|
||||||
|
start, pos, switch, stype, name, eq, quote = string.find(part,
|
||||||
|
"(%(?%)?)" .. -- switch = a possible ( or ) switching the filter on or off
|
||||||
|
"([:%[#.]?)" .. -- stype = a possible :, [, #, or .
|
||||||
|
"([%w-_\\]+)" .. -- name = 1 or more alfanumeric chars (+ hyphen, reverse slash and uderscore)
|
||||||
|
"([|%*~%$!%^]?=?)" .. -- eq = a possible |=, *=, ~=, $=, !=, ^=, or =
|
||||||
|
"(['\"]?)", -- quote = a ' or " delimiting a possible attribute value
|
||||||
|
pos + 1
|
||||||
|
)
|
||||||
|
if not name then break end
|
||||||
|
repeat
|
||||||
|
if ":" == stype then
|
||||||
|
filter = name
|
||||||
|
--[[goto nextname]] break
|
||||||
|
end
|
||||||
|
if ")" == switch then
|
||||||
|
filter = nil
|
||||||
|
end
|
||||||
|
if "[" == stype and "" ~= quote then
|
||||||
|
local value
|
||||||
|
start, pos, value = string.find(part, "(%b" .. quote .. quote .. ")]", pos)
|
||||||
|
name = name .. eq .. value
|
||||||
|
end
|
||||||
|
local matched = match(stype, name)
|
||||||
|
if filter == "not" then
|
||||||
|
excludes = excludes + matched
|
||||||
|
else
|
||||||
|
resultset = resultset * matched
|
||||||
|
end
|
||||||
|
--::nextname::
|
||||||
|
break
|
||||||
|
until true
|
||||||
|
end
|
||||||
|
resultset = resultset - excludes
|
||||||
|
subjects = Set:new(resultset)
|
||||||
|
--::nextpart::
|
||||||
|
break
|
||||||
|
until true
|
||||||
|
end
|
||||||
|
resultset = resultset:tolist()
|
||||||
|
table.sort(resultset, function (a, b) return a.index < b.index end)
|
||||||
|
return resultset
|
||||||
|
end
|
||||||
|
|
||||||
|
function ElementNode:select(s) return select(self, s) end
|
||||||
|
ElementNode.mt.__call = select
|
||||||
|
|
||||||
|
return ElementNode
|
19
htmlparser/voidelements.lua
Normal file
19
htmlparser/voidelements.lua
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
-- vim: ft=lua ts=2
|
||||||
|
return {
|
||||||
|
area = true,
|
||||||
|
base = true,
|
||||||
|
br = true,
|
||||||
|
col = true,
|
||||||
|
command = true,
|
||||||
|
embed = true,
|
||||||
|
hr = true,
|
||||||
|
img = true,
|
||||||
|
input = true,
|
||||||
|
keygen = true,
|
||||||
|
link = true,
|
||||||
|
meta = true,
|
||||||
|
param = true,
|
||||||
|
source = true,
|
||||||
|
track = true,
|
||||||
|
wbr = true
|
||||||
|
}
|
388
json.lua
Normal file
388
json.lua
Normal file
@ -0,0 +1,388 @@
|
|||||||
|
--
|
||||||
|
-- json.lua
|
||||||
|
--
|
||||||
|
-- Copyright (c) 2020 rxi
|
||||||
|
--
|
||||||
|
-- Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||||
|
-- this software and associated documentation files (the "Software"), to deal in
|
||||||
|
-- the Software without restriction, including without limitation the rights to
|
||||||
|
-- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||||
|
-- of the Software, and to permit persons to whom the Software is furnished to do
|
||||||
|
-- so, subject to the following conditions:
|
||||||
|
--
|
||||||
|
-- The above copyright notice and this permission notice shall be included in all
|
||||||
|
-- copies or substantial portions of the Software.
|
||||||
|
--
|
||||||
|
-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
-- SOFTWARE.
|
||||||
|
--
|
||||||
|
|
||||||
|
local json = { _version = "0.1.2" }
|
||||||
|
|
||||||
|
-------------------------------------------------------------------------------
|
||||||
|
-- Encode
|
||||||
|
-------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
local encode
|
||||||
|
|
||||||
|
local escape_char_map = {
|
||||||
|
[ "\\" ] = "\\",
|
||||||
|
[ "\"" ] = "\"",
|
||||||
|
[ "\b" ] = "b",
|
||||||
|
[ "\f" ] = "f",
|
||||||
|
[ "\n" ] = "n",
|
||||||
|
[ "\r" ] = "r",
|
||||||
|
[ "\t" ] = "t",
|
||||||
|
}
|
||||||
|
|
||||||
|
local escape_char_map_inv = { [ "/" ] = "/" }
|
||||||
|
for k, v in pairs(escape_char_map) do
|
||||||
|
escape_char_map_inv[v] = k
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function escape_char(c)
|
||||||
|
return "\\" .. (escape_char_map[c] or string.format("u%04x", c:byte()))
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function encode_nil(val)
|
||||||
|
return "null"
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function encode_table(val, stack)
|
||||||
|
local res = {}
|
||||||
|
stack = stack or {}
|
||||||
|
|
||||||
|
-- Circular reference?
|
||||||
|
if stack[val] then error("circular reference") end
|
||||||
|
|
||||||
|
stack[val] = true
|
||||||
|
|
||||||
|
if rawget(val, 1) ~= nil or next(val) == nil then
|
||||||
|
-- Treat as array -- check keys are valid and it is not sparse
|
||||||
|
local n = 0
|
||||||
|
for k in pairs(val) do
|
||||||
|
if type(k) ~= "number" then
|
||||||
|
error("invalid table: mixed or invalid key types")
|
||||||
|
end
|
||||||
|
n = n + 1
|
||||||
|
end
|
||||||
|
if n ~= #val then
|
||||||
|
error("invalid table: sparse array")
|
||||||
|
end
|
||||||
|
-- Encode
|
||||||
|
for i, v in ipairs(val) do
|
||||||
|
table.insert(res, encode(v, stack))
|
||||||
|
end
|
||||||
|
stack[val] = nil
|
||||||
|
return "[" .. table.concat(res, ",") .. "]"
|
||||||
|
|
||||||
|
else
|
||||||
|
-- Treat as an object
|
||||||
|
for k, v in pairs(val) do
|
||||||
|
if type(k) ~= "string" then
|
||||||
|
error("invalid table: mixed or invalid key types")
|
||||||
|
end
|
||||||
|
table.insert(res, encode(k, stack) .. ":" .. encode(v, stack))
|
||||||
|
end
|
||||||
|
stack[val] = nil
|
||||||
|
return "{" .. table.concat(res, ",") .. "}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function encode_string(val)
|
||||||
|
return '"' .. val:gsub('[%z\1-\31\\"]', escape_char) .. '"'
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function encode_number(val)
|
||||||
|
-- Check for NaN, -inf and inf
|
||||||
|
if val ~= val or val <= -math.huge or val >= math.huge then
|
||||||
|
error("unexpected number value '" .. tostring(val) .. "'")
|
||||||
|
end
|
||||||
|
return string.format("%.14g", val)
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local type_func_map = {
|
||||||
|
[ "nil" ] = encode_nil,
|
||||||
|
[ "table" ] = encode_table,
|
||||||
|
[ "string" ] = encode_string,
|
||||||
|
[ "number" ] = encode_number,
|
||||||
|
[ "boolean" ] = tostring,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
encode = function(val, stack)
|
||||||
|
local t = type(val)
|
||||||
|
local f = type_func_map[t]
|
||||||
|
if f then
|
||||||
|
return f(val, stack)
|
||||||
|
end
|
||||||
|
error("unexpected type '" .. t .. "'")
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
function json.encode(val)
|
||||||
|
return ( encode(val) )
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
-------------------------------------------------------------------------------
|
||||||
|
-- Decode
|
||||||
|
-------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
local parse
|
||||||
|
|
||||||
|
local function create_set(...)
|
||||||
|
local res = {}
|
||||||
|
for i = 1, select("#", ...) do
|
||||||
|
res[ select(i, ...) ] = true
|
||||||
|
end
|
||||||
|
return res
|
||||||
|
end
|
||||||
|
|
||||||
|
local space_chars = create_set(" ", "\t", "\r", "\n")
|
||||||
|
local delim_chars = create_set(" ", "\t", "\r", "\n", "]", "}", ",")
|
||||||
|
local escape_chars = create_set("\\", "/", '"', "b", "f", "n", "r", "t", "u")
|
||||||
|
local literals = create_set("true", "false", "null")
|
||||||
|
|
||||||
|
local literal_map = {
|
||||||
|
[ "true" ] = true,
|
||||||
|
[ "false" ] = false,
|
||||||
|
[ "null" ] = nil,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
local function next_char(str, idx, set, negate)
|
||||||
|
for i = idx, #str do
|
||||||
|
if set[str:sub(i, i)] ~= negate then
|
||||||
|
return i
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return #str + 1
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function decode_error(str, idx, msg)
|
||||||
|
local line_count = 1
|
||||||
|
local col_count = 1
|
||||||
|
for i = 1, idx - 1 do
|
||||||
|
col_count = col_count + 1
|
||||||
|
if str:sub(i, i) == "\n" then
|
||||||
|
line_count = line_count + 1
|
||||||
|
col_count = 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
error( string.format("%s at line %d col %d", msg, line_count, col_count) )
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function codepoint_to_utf8(n)
|
||||||
|
-- http://scripts.sil.org/cms/scripts/page.php?site_id=nrsi&id=iws-appendixa
|
||||||
|
local f = math.floor
|
||||||
|
if n <= 0x7f then
|
||||||
|
return string.char(n)
|
||||||
|
elseif n <= 0x7ff then
|
||||||
|
return string.char(f(n / 64) + 192, n % 64 + 128)
|
||||||
|
elseif n <= 0xffff then
|
||||||
|
return string.char(f(n / 4096) + 224, f(n % 4096 / 64) + 128, n % 64 + 128)
|
||||||
|
elseif n <= 0x10ffff then
|
||||||
|
return string.char(f(n / 262144) + 240, f(n % 262144 / 4096) + 128,
|
||||||
|
f(n % 4096 / 64) + 128, n % 64 + 128)
|
||||||
|
end
|
||||||
|
error( string.format("invalid unicode codepoint '%x'", n) )
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function parse_unicode_escape(s)
|
||||||
|
local n1 = tonumber( s:sub(1, 4), 16 )
|
||||||
|
local n2 = tonumber( s:sub(7, 10), 16 )
|
||||||
|
-- Surrogate pair?
|
||||||
|
if n2 then
|
||||||
|
return codepoint_to_utf8((n1 - 0xd800) * 0x400 + (n2 - 0xdc00) + 0x10000)
|
||||||
|
else
|
||||||
|
return codepoint_to_utf8(n1)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function parse_string(str, i)
|
||||||
|
local res = ""
|
||||||
|
local j = i + 1
|
||||||
|
local k = j
|
||||||
|
|
||||||
|
while j <= #str do
|
||||||
|
local x = str:byte(j)
|
||||||
|
|
||||||
|
if x < 32 then
|
||||||
|
decode_error(str, j, "control character in string")
|
||||||
|
|
||||||
|
elseif x == 92 then -- `\`: Escape
|
||||||
|
res = res .. str:sub(k, j - 1)
|
||||||
|
j = j + 1
|
||||||
|
local c = str:sub(j, j)
|
||||||
|
if c == "u" then
|
||||||
|
local hex = str:match("^[dD][89aAbB]%x%x\\u%x%x%x%x", j + 1)
|
||||||
|
or str:match("^%x%x%x%x", j + 1)
|
||||||
|
or decode_error(str, j - 1, "invalid unicode escape in string")
|
||||||
|
res = res .. parse_unicode_escape(hex)
|
||||||
|
j = j + #hex
|
||||||
|
else
|
||||||
|
if not escape_chars[c] then
|
||||||
|
decode_error(str, j - 1, "invalid escape char '" .. c .. "' in string")
|
||||||
|
end
|
||||||
|
res = res .. escape_char_map_inv[c]
|
||||||
|
end
|
||||||
|
k = j + 1
|
||||||
|
|
||||||
|
elseif x == 34 then -- `"`: End of string
|
||||||
|
res = res .. str:sub(k, j - 1)
|
||||||
|
return res, j + 1
|
||||||
|
end
|
||||||
|
|
||||||
|
j = j + 1
|
||||||
|
end
|
||||||
|
|
||||||
|
decode_error(str, i, "expected closing quote for string")
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function parse_number(str, i)
|
||||||
|
local x = next_char(str, i, delim_chars)
|
||||||
|
local s = str:sub(i, x - 1)
|
||||||
|
local n = tonumber(s)
|
||||||
|
if not n then
|
||||||
|
decode_error(str, i, "invalid number '" .. s .. "'")
|
||||||
|
end
|
||||||
|
return n, x
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function parse_literal(str, i)
|
||||||
|
local x = next_char(str, i, delim_chars)
|
||||||
|
local word = str:sub(i, x - 1)
|
||||||
|
if not literals[word] then
|
||||||
|
decode_error(str, i, "invalid literal '" .. word .. "'")
|
||||||
|
end
|
||||||
|
return literal_map[word], x
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function parse_array(str, i)
|
||||||
|
local res = {}
|
||||||
|
local n = 1
|
||||||
|
i = i + 1
|
||||||
|
while 1 do
|
||||||
|
local x
|
||||||
|
i = next_char(str, i, space_chars, true)
|
||||||
|
-- Empty / end of array?
|
||||||
|
if str:sub(i, i) == "]" then
|
||||||
|
i = i + 1
|
||||||
|
break
|
||||||
|
end
|
||||||
|
-- Read token
|
||||||
|
x, i = parse(str, i)
|
||||||
|
res[n] = x
|
||||||
|
n = n + 1
|
||||||
|
-- Next token
|
||||||
|
i = next_char(str, i, space_chars, true)
|
||||||
|
local chr = str:sub(i, i)
|
||||||
|
i = i + 1
|
||||||
|
if chr == "]" then break end
|
||||||
|
if chr ~= "," then decode_error(str, i, "expected ']' or ','") end
|
||||||
|
end
|
||||||
|
return res, i
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local function parse_object(str, i)
|
||||||
|
local res = {}
|
||||||
|
i = i + 1
|
||||||
|
while 1 do
|
||||||
|
local key, val
|
||||||
|
i = next_char(str, i, space_chars, true)
|
||||||
|
-- Empty / end of object?
|
||||||
|
if str:sub(i, i) == "}" then
|
||||||
|
i = i + 1
|
||||||
|
break
|
||||||
|
end
|
||||||
|
-- Read key
|
||||||
|
if str:sub(i, i) ~= '"' then
|
||||||
|
decode_error(str, i, "expected string for key")
|
||||||
|
end
|
||||||
|
key, i = parse(str, i)
|
||||||
|
-- Read ':' delimiter
|
||||||
|
i = next_char(str, i, space_chars, true)
|
||||||
|
if str:sub(i, i) ~= ":" then
|
||||||
|
decode_error(str, i, "expected ':' after key")
|
||||||
|
end
|
||||||
|
i = next_char(str, i + 1, space_chars, true)
|
||||||
|
-- Read value
|
||||||
|
val, i = parse(str, i)
|
||||||
|
-- Set
|
||||||
|
res[key] = val
|
||||||
|
-- Next token
|
||||||
|
i = next_char(str, i, space_chars, true)
|
||||||
|
local chr = str:sub(i, i)
|
||||||
|
i = i + 1
|
||||||
|
if chr == "}" then break end
|
||||||
|
if chr ~= "," then decode_error(str, i, "expected '}' or ','") end
|
||||||
|
end
|
||||||
|
return res, i
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
local char_func_map = {
|
||||||
|
[ '"' ] = parse_string,
|
||||||
|
[ "0" ] = parse_number,
|
||||||
|
[ "1" ] = parse_number,
|
||||||
|
[ "2" ] = parse_number,
|
||||||
|
[ "3" ] = parse_number,
|
||||||
|
[ "4" ] = parse_number,
|
||||||
|
[ "5" ] = parse_number,
|
||||||
|
[ "6" ] = parse_number,
|
||||||
|
[ "7" ] = parse_number,
|
||||||
|
[ "8" ] = parse_number,
|
||||||
|
[ "9" ] = parse_number,
|
||||||
|
[ "-" ] = parse_number,
|
||||||
|
[ "t" ] = parse_literal,
|
||||||
|
[ "f" ] = parse_literal,
|
||||||
|
[ "n" ] = parse_literal,
|
||||||
|
[ "[" ] = parse_array,
|
||||||
|
[ "{" ] = parse_object,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
parse = function(str, idx)
|
||||||
|
local chr = str:sub(idx, idx)
|
||||||
|
local f = char_func_map[chr]
|
||||||
|
if f then
|
||||||
|
return f(str, idx)
|
||||||
|
end
|
||||||
|
decode_error(str, idx, "unexpected character '" .. chr .. "'")
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
function json.decode(str)
|
||||||
|
if type(str) ~= "string" then
|
||||||
|
error("expected argument of type string, got " .. type(str))
|
||||||
|
end
|
||||||
|
local res, idx = parse(str, next_char(str, 1, space_chars, true))
|
||||||
|
idx = next_char(str, idx, space_chars, true)
|
||||||
|
if idx <= #str then
|
||||||
|
decode_error(str, idx, "trailing garbage")
|
||||||
|
end
|
||||||
|
return res
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
return json
|
370
make-epub.lua
Executable file
370
make-epub.lua
Executable file
@ -0,0 +1,370 @@
|
|||||||
|
#!/usr/bin/env luajit
|
||||||
|
|
||||||
|
local help = [[Usage:
|
||||||
|
|
||||||
|
make-epub.lua <config (JSON file)> [action]
|
||||||
|
|
||||||
|
If "." is used instead of a JSON file, every JSON file in the current directory
|
||||||
|
will be used to make multiple ebooks back-to-back.
|
||||||
|
|
||||||
|
[action]: If not specified, all steps will be taken in order (except cleanall).
|
||||||
|
download: All pages will be downloaded to their own HTML files.
|
||||||
|
convert: Each page is converted to Markdown.
|
||||||
|
concat: A file is created for each section out of its pages.
|
||||||
|
markdown: Metadata frontmatter and Markdown section files will be
|
||||||
|
concatenated into a single Markdown file.
|
||||||
|
epub: Markdown file will be converted to an ePub using pandoc.
|
||||||
|
cleanpage: All page files will be deleted, along with their extra
|
||||||
|
directories.
|
||||||
|
cleanall: Deletes everything except the config file and ePub.
|
||||||
|
|
||||||
|
Requirements:
|
||||||
|
- Binaries: pandoc, curl
|
||||||
|
|
||||||
|
For how to write a configuration and examples, see the .lua-files README:
|
||||||
|
https://github.com/TangentFoxy/.lua-files#make-epublua
|
||||||
|
]]
|
||||||
|
|
||||||
|
local success, utility = pcall(function()
|
||||||
|
return dofile((arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)")) .. "utility-functions.lua")
|
||||||
|
end)
|
||||||
|
if not success then
|
||||||
|
print("\n\n" .. tostring(utility))
|
||||||
|
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
|
||||||
|
end
|
||||||
|
|
||||||
|
local path_separator = utility.path_separator
|
||||||
|
local copyright_warning = "This ebook was created by an automated tool for personal use. It cannot be distributed or sold without permission of copyright holder(s). (If you did not make this ebook, you may be infringing.)\n\n"
|
||||||
|
|
||||||
|
-- also checks for errors TODO make it check for ALL required elements and error if any are missing!
|
||||||
|
local function load_config(config_file_text)
|
||||||
|
local json = utility.require("json")
|
||||||
|
|
||||||
|
config = json.decode(config_file_text)
|
||||||
|
config.config_file_text = config_file_text
|
||||||
|
|
||||||
|
if not config.authors then
|
||||||
|
config.authors = {} -- at least have an empty table so it doesn't error below TODO verify that this is actually true
|
||||||
|
end
|
||||||
|
|
||||||
|
if not config.keywords then
|
||||||
|
config.keywords = {} -- TODO test if it will work empty
|
||||||
|
end
|
||||||
|
|
||||||
|
if config.author then -- old style single author will be prepended to authors list
|
||||||
|
table.insert(config.authors, 1, config.author)
|
||||||
|
end
|
||||||
|
|
||||||
|
-- if only using a single section
|
||||||
|
if config.first_section_url and not config.base_url then
|
||||||
|
config.base_url = config.first_section_url -- prevent errors due to required item being missing
|
||||||
|
end
|
||||||
|
|
||||||
|
-- detecting manually specified sections and flagging it to the rest of the script
|
||||||
|
if config.sections[1] then
|
||||||
|
config.sections.start = 1
|
||||||
|
config.sections.finish = #config.sections
|
||||||
|
config.manually_specified_sections = true -- decided to make this part of the config spec, but it's set here again just in case
|
||||||
|
config.base_url = "http://example.com/" -- must be defined to prevent errors; it will be manipulated and ignored in this use case
|
||||||
|
end
|
||||||
|
|
||||||
|
if not config.sections.start then
|
||||||
|
config.sections.start = 1 -- the first one can be optional since the common use case is ALL OF THEM
|
||||||
|
end
|
||||||
|
|
||||||
|
if #config.page_counts ~= config.sections.finish - config.sections.start + 1 then
|
||||||
|
error("Number of page_counts does not match number of sections.")
|
||||||
|
end
|
||||||
|
|
||||||
|
if config.section_titles and #config.section_titles ~= config.sections.finish - config.sections.start + 1 then
|
||||||
|
error("Number of section_titles does not match number of sections.")
|
||||||
|
end
|
||||||
|
|
||||||
|
local base_file_name
|
||||||
|
if config.title and config.authors[1] then
|
||||||
|
-- first author in list gets top billing (this is problematic in anthologies unless an editor is the first entry)
|
||||||
|
base_file_name = config.title .. " by " .. config.authors[1]
|
||||||
|
elseif config.title then
|
||||||
|
base_file_name = config.title
|
||||||
|
else
|
||||||
|
base_file_name = "Book"
|
||||||
|
end
|
||||||
|
config.base_file_name = utility.make_safe_file_name(config.base_file_name or base_file_name)
|
||||||
|
|
||||||
|
return config
|
||||||
|
end
|
||||||
|
|
||||||
|
local function format_metadata(config)
|
||||||
|
local function stringify_list(list)
|
||||||
|
local output = utility.escape_quotes(list[1]):enquote()
|
||||||
|
for i = 2, #list do
|
||||||
|
output = output .. ", " .. utility.escape_quotes(list[i]):enquote()
|
||||||
|
end
|
||||||
|
return output
|
||||||
|
end
|
||||||
|
|
||||||
|
local keywords_string = stringify_list(config.keywords)
|
||||||
|
local metadata = {
|
||||||
|
"---",
|
||||||
|
"title: " .. utility.escape_quotes(config.title):enquote(),
|
||||||
|
"author: [" .. stringify_list(config.authors) .. "]",
|
||||||
|
"keywords: [" .. keywords_string .. "]",
|
||||||
|
"tags: [" .. keywords_string .. "]",
|
||||||
|
"---",
|
||||||
|
"",
|
||||||
|
}
|
||||||
|
|
||||||
|
return table.concat(metadata, "\n") .. "\n"
|
||||||
|
end
|
||||||
|
|
||||||
|
local function download_pages(config)
|
||||||
|
local htmlparser = utility.require("htmlparser")
|
||||||
|
utility.required_program("curl")
|
||||||
|
local working_dir = config.base_file_name
|
||||||
|
|
||||||
|
os.execute("mkdir " .. working_dir:enquote())
|
||||||
|
for section = config.sections.start, config.sections.finish do
|
||||||
|
local section_dir = working_dir .. path_separator .. tostring(section) .. path_separator
|
||||||
|
os.execute("mkdir " .. section_dir:sub(1, -2):enquote())
|
||||||
|
|
||||||
|
local section_url
|
||||||
|
if section == 1 and config.first_section_url then
|
||||||
|
section_url = config.first_section_url
|
||||||
|
else
|
||||||
|
section_url = config.base_url .. string.format("%02i", section) -- leftpad 2 (This will eventually cause problems.)
|
||||||
|
end
|
||||||
|
|
||||||
|
if config.manually_specified_sections then
|
||||||
|
section_url = config.sections[section]
|
||||||
|
end
|
||||||
|
|
||||||
|
for page = 1, config.page_counts[section - (config.sections.start - 1)] do
|
||||||
|
local download_url
|
||||||
|
if page == 1 then
|
||||||
|
download_url = section_url
|
||||||
|
else
|
||||||
|
download_url = section_url .. "?page=" .. tostring(page)
|
||||||
|
end
|
||||||
|
|
||||||
|
local temporary_html_file_name = utility.tmp_file_name()
|
||||||
|
os.execute("curl " .. download_url:enquote() .. " > " .. temporary_html_file_name)
|
||||||
|
|
||||||
|
utility.open(temporary_html_file_name, "r", "Could not download " .. download_url:enquote())(function(html_file)
|
||||||
|
local raw_html = html_file:read("*all")
|
||||||
|
|
||||||
|
local parser = htmlparser.parse(raw_html)
|
||||||
|
local content_tag = parser:select(".article > div > div") -- TODO add ability to set selector in config!
|
||||||
|
local text = content_tag[1]:getcontent()
|
||||||
|
|
||||||
|
if page == 1 and config.extract_titles then
|
||||||
|
text = parser:select(".headline")[1]:gettext() .. text
|
||||||
|
end
|
||||||
|
|
||||||
|
utility.open(section_dir .. page .. ".html", "w")(function(page_file)
|
||||||
|
page_file:write(text .. "\n")
|
||||||
|
end)
|
||||||
|
end)
|
||||||
|
|
||||||
|
os.execute("rm " .. temporary_html_file_name)
|
||||||
|
os.execute("sleep " .. tostring(math.random(5))) -- avoid rate limiting
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
local function convert_pages(config)
|
||||||
|
utility.required_program("pandoc")
|
||||||
|
local working_dir = config.base_file_name
|
||||||
|
|
||||||
|
for section = config.sections.start, config.sections.finish do
|
||||||
|
local section_dir = working_dir .. path_separator .. tostring(section) .. path_separator
|
||||||
|
|
||||||
|
for page = 1, config.page_counts[section - (config.sections.start - 1)] do
|
||||||
|
local page_file_name_base = section_dir .. page
|
||||||
|
os.execute("pandoc --from html --to markdown " .. (page_file_name_base .. ".html"):enquote() .. " -o " .. (page_file_name_base .. ".md"):enquote())
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
local function concatenate_pages(config)
|
||||||
|
local working_dir = config.base_file_name
|
||||||
|
|
||||||
|
for section = config.sections.start, config.sections.finish do
|
||||||
|
local section_dir = working_dir .. path_separator .. tostring(section) .. path_separator
|
||||||
|
utility.open(working_dir .. path_separator .. tostring(section) .. ".md", "w")(function(section_file)
|
||||||
|
for page = 1, config.page_counts[section - (config.sections.start - 1)] do
|
||||||
|
utility.open(section_dir .. page .. ".md", "r")(function(page_file)
|
||||||
|
if config.sections.automatic_naming then
|
||||||
|
local naming_patterns = {
|
||||||
|
"^Prologue$",
|
||||||
|
"^Chapter %d+$",
|
||||||
|
"^%*%*CHAPTER ",
|
||||||
|
}
|
||||||
|
local line = page_file:read("*line")
|
||||||
|
while line do
|
||||||
|
for _, pattern in ipairs(naming_patterns) do
|
||||||
|
if line:find(pattern) then
|
||||||
|
line = "# " .. line
|
||||||
|
end
|
||||||
|
end
|
||||||
|
section_file:write(line .. "\n")
|
||||||
|
line = page_file:read("*line")
|
||||||
|
end
|
||||||
|
else
|
||||||
|
section_file:write(page_file:read("*all"))
|
||||||
|
end
|
||||||
|
section_file:write("\n") -- guarantees no accidental line collisions
|
||||||
|
end)
|
||||||
|
end
|
||||||
|
end)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
local function write_markdown_file(config)
|
||||||
|
local working_dir = config.base_file_name
|
||||||
|
|
||||||
|
utility.open(config.base_file_name .. ".md", "w")(function(markdown_file)
|
||||||
|
markdown_file:write(format_metadata(config))
|
||||||
|
markdown_file:write(copyright_warning)
|
||||||
|
|
||||||
|
for section = config.sections.start, config.sections.finish do
|
||||||
|
if config.sections.naming then
|
||||||
|
markdown_file:write("\n\n# " .. config.sections.naming .. " " .. tostring(section))
|
||||||
|
elseif config.section_titles then
|
||||||
|
markdown_file:write("\n\n# " .. config.section_titles[section])
|
||||||
|
elseif config.lazy_titling then
|
||||||
|
local section_url
|
||||||
|
if section == 1 and config.first_section_url then
|
||||||
|
section_url = config.first_section_url
|
||||||
|
else
|
||||||
|
section_url = config.base_url
|
||||||
|
end
|
||||||
|
if config.manually_specified_sections then
|
||||||
|
section_url = config.sections[section]
|
||||||
|
end
|
||||||
|
|
||||||
|
local title_parts = section_url:sub(30):gsplit("-")
|
||||||
|
while tonumber(title_parts[#title_parts]) do
|
||||||
|
title_parts[#title_parts] = nil
|
||||||
|
end
|
||||||
|
local last_part = title_parts[#title_parts]
|
||||||
|
if last_part == "ch" or last_part == "pt" then
|
||||||
|
title_parts[#title_parts] = nil
|
||||||
|
end
|
||||||
|
for index, part in ipairs(title_parts) do
|
||||||
|
title_parts[index] = part:sub(1, 1):upper() .. part:sub(2)
|
||||||
|
end
|
||||||
|
markdown_file:write("\n\n# " .. table.concat(title_parts, " "))
|
||||||
|
end
|
||||||
|
markdown_file:write("\n\n")
|
||||||
|
|
||||||
|
local section_file_name = working_dir .. path_separator .. tostring(section)
|
||||||
|
utility.open(section_file_name .. ".md", "r")(function(section_file)
|
||||||
|
markdown_file:write(section_file:read("*all"))
|
||||||
|
end)
|
||||||
|
end
|
||||||
|
|
||||||
|
markdown_file:write("\n\n# Ebook Creation Metadata\n\n")
|
||||||
|
markdown_file:write(copyright_warning)
|
||||||
|
markdown_file:write("This ebook was created using the following config:\n\n")
|
||||||
|
markdown_file:write("```json\n" .. config.config_file_text .. "\n```\n")
|
||||||
|
end)
|
||||||
|
end
|
||||||
|
|
||||||
|
local function make_epub(config)
|
||||||
|
utility.required_program("pandoc")
|
||||||
|
local output_dir = "All ePubs"
|
||||||
|
os.execute("mkdir " .. output_dir:enquote())
|
||||||
|
|
||||||
|
local markdown_file_name = config.base_file_name .. ".md"
|
||||||
|
local epub_file_name = output_dir .. path_separator .. config.base_file_name .. ".epub"
|
||||||
|
os.execute("pandoc --from markdown --to epub " .. markdown_file_name:enquote() .. " -o " .. epub_file_name:enquote() .. " --toc=true")
|
||||||
|
end
|
||||||
|
|
||||||
|
local function rm_page_files(config)
|
||||||
|
local working_dir = config.base_file_name
|
||||||
|
|
||||||
|
for section = config.sections.start, config.sections.finish do
|
||||||
|
local section_dir = working_dir .. path_separator .. tostring(section)
|
||||||
|
os.execute(utility.recursive_remove_command .. section_dir:enquote())
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
local function rm_all(config)
|
||||||
|
local working_dir = config.base_file_name
|
||||||
|
|
||||||
|
os.execute(utility.recursive_remove_command .. working_dir:enquote())
|
||||||
|
os.execute("rm " .. (config.base_file_name .. ".md"):enquote())
|
||||||
|
end
|
||||||
|
|
||||||
|
local function argparse(arguments, positional_arguments)
|
||||||
|
local recognized_arguments = {}
|
||||||
|
for index, argument in ipairs(arguments) do
|
||||||
|
for _, help in ipairs({"-h", "--help", "/?", "/help", "help"}) do
|
||||||
|
if argument == help then
|
||||||
|
print(help)
|
||||||
|
return nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
if positional_arguments[index] then
|
||||||
|
recognized_arguments[positional_arguments[index]] = argument
|
||||||
|
end
|
||||||
|
end
|
||||||
|
return recognized_arguments
|
||||||
|
end
|
||||||
|
|
||||||
|
local function main(arguments)
|
||||||
|
local config = utility.open(arguments.json_file_name, "r")(function(config_file)
|
||||||
|
return load_config(config_file:read("*all"))
|
||||||
|
end)
|
||||||
|
|
||||||
|
local actions = {
|
||||||
|
download = download_pages,
|
||||||
|
convert = convert_pages,
|
||||||
|
concat = concatenate_pages,
|
||||||
|
markdown = write_markdown_file,
|
||||||
|
epub = make_epub,
|
||||||
|
cleanpage = rm_page_files,
|
||||||
|
cleanall = rm_all,
|
||||||
|
}
|
||||||
|
|
||||||
|
if arguments.action then
|
||||||
|
if actions[arguments.action] then
|
||||||
|
actions[arguments.action](config)
|
||||||
|
else
|
||||||
|
print(help)
|
||||||
|
error("\nInvalid action specified.")
|
||||||
|
end
|
||||||
|
else
|
||||||
|
print("\nDownloading pages...\n")
|
||||||
|
download_pages(config)
|
||||||
|
print("\nConverting pages...\n")
|
||||||
|
convert_pages(config)
|
||||||
|
print("\nConcatenating pages...\n")
|
||||||
|
concatenate_pages(config)
|
||||||
|
print("\nRemoving page files...\n")
|
||||||
|
rm_page_files(config)
|
||||||
|
print("\nWriting Markdown file...\n")
|
||||||
|
write_markdown_file(config)
|
||||||
|
print("\nMaking ePub...\n")
|
||||||
|
make_epub(config)
|
||||||
|
print("\nDone!\n")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
local positional_arguments = {"json_file_name", "action"}
|
||||||
|
local arguments = argparse(arg, positional_arguments)
|
||||||
|
if not arguments.json_file_name then
|
||||||
|
print(help)
|
||||||
|
error("\nA config file name/path must be specified.")
|
||||||
|
end
|
||||||
|
|
||||||
|
if arguments.json_file_name == "." then
|
||||||
|
utility.ls(".")(function(file_name)
|
||||||
|
if file_name:find(".json$") then
|
||||||
|
arguments.json_file_name = file_name
|
||||||
|
main(arguments)
|
||||||
|
end
|
||||||
|
end)
|
||||||
|
else
|
||||||
|
main(arguments)
|
||||||
|
end
|
62
test.lua
62
test.lua
@ -1,51 +1,21 @@
|
|||||||
#!/usr/bin/env luajit
|
#!/usr/bin/env luajit
|
||||||
|
|
||||||
-- if utility-functions.lua has an error, this won't show it, so for testing purposes, I don't use it here
|
local success, utility = pcall(function()
|
||||||
-- local error_occurred, utility = pcall(function() return dofile(arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)") .. "utility-functions.lua") end) if not error_occurred then error("\n\nThis script is installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n") end
|
return dofile((arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)")) .. "utility-functions.lua")
|
||||||
utility = dofile(arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)") .. "utility-functions.lua")
|
end)
|
||||||
|
if not success then
|
||||||
print("---")
|
print("\n\n" .. tostring(utility))
|
||||||
|
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
|
||||||
local commands = {
|
|
||||||
"llm run dolphin-mixtral \"How are you?\"",
|
|
||||||
"llm run curt \"How are you?\"",
|
|
||||||
"llm run curt2 \"How are you?\"",
|
|
||||||
"llm run synx \"How are you?\"",
|
|
||||||
"llm run synx \"Describe actions you would take as a synx.\"",
|
|
||||||
"llm run synx \"Describe a synx.\"",
|
|
||||||
"llm run synx \"What are you?\""
|
|
||||||
}
|
|
||||||
|
|
||||||
-- local llm = loadfile(utility.path .. "llm.lua")
|
|
||||||
|
|
||||||
for _, command in ipairs(commands) do
|
|
||||||
-- print("\n\n\nTEST START", command .. "\n\n\n")
|
|
||||||
|
|
||||||
-- print(command:rep(5, "\n"))
|
|
||||||
|
|
||||||
for i = 1, 5 do
|
|
||||||
-- os.execute(command)
|
|
||||||
-- loadfile(utility.path .. "llm.lua")(command:sub(5))
|
|
||||||
|
|
||||||
-- command = command:sub(5)
|
|
||||||
-- local tab = {}
|
|
||||||
-- for argument in command:gmatch("%S+") do
|
|
||||||
-- table.insert(tab, argument)
|
|
||||||
-- end
|
|
||||||
-- llm(unpack(tab))
|
|
||||||
|
|
||||||
-- print("\nOUTPUT ENDS\n")
|
|
||||||
|
|
||||||
-- error("\n\ntmp break\n\n")
|
|
||||||
|
|
||||||
|
|
||||||
-- print(command)
|
|
||||||
os.execute("echo " .. command .. " >> .run-this-shit.ps1")
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
-- os.execute("echo " .. commands[1] .. " >> .run-this-shit.ps1")
|
local htmlparser = utility.require("htmlparser")
|
||||||
os.execute("pwsh .run-this-shit.ps1")
|
utility.open("TEST.html", "r")(function(html_file)
|
||||||
os.execute("rm .run-this-shit.ps1")
|
local raw_html = html_file:read("*all")
|
||||||
|
|
||||||
print("---")
|
local parser = htmlparser.parse(raw_html)
|
||||||
|
local content_tag = parser:select(".article > div > div") -- TODO add ability to set selector in config!
|
||||||
|
local text = content_tag[1]:getcontent()
|
||||||
|
|
||||||
|
local title_tag = parser:select(".headline")
|
||||||
|
print(title_tag[1]:gettext())
|
||||||
|
end)
|
||||||
|
@ -1,14 +1,30 @@
|
|||||||
|
-- TO USE, PUT THE INTERIOR OF THIS FUNCTION IN YOUR FILE
|
||||||
|
-- this only works if that file is in the same directory as this one - but works no matter where it was called from
|
||||||
|
local function _example_load()
|
||||||
|
local success, utility = pcall(function()
|
||||||
|
return dofile((arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)")) .. "utility-functions.lua")
|
||||||
|
end)
|
||||||
|
if not success then
|
||||||
|
print("\n\n" .. tostring(utility))
|
||||||
|
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
math.randomseed(os.time())
|
math.randomseed(os.time())
|
||||||
|
|
||||||
local utility = {}
|
local utility = {}
|
||||||
|
|
||||||
if package.config:sub(1, 1) == "\\" then
|
if package.config:sub(1, 1) == "\\" then
|
||||||
utility.OS = "Windows"
|
utility.OS = "Windows"
|
||||||
|
utility.path_separator = "\\"
|
||||||
|
utility.recursive_remove_command = "rmdir /s /q "
|
||||||
else
|
else
|
||||||
utility.OS = "UNIX-like"
|
utility.OS = "UNIX-like"
|
||||||
|
utility.path_separator = "/"
|
||||||
|
utility.recursive_remove_command = "rm -r "
|
||||||
end
|
end
|
||||||
|
|
||||||
utility.path = arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)") -- related to discussion in https://stackoverflow.com/q/6380820
|
utility.path = arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)") -- inspired by discussion in https://stackoverflow.com/q/6380820
|
||||||
|
|
||||||
-- always uses outputting to a temporary file to guarantee safety
|
-- always uses outputting to a temporary file to guarantee safety
|
||||||
function os.capture_safe(command, tmp_file_name)
|
function os.capture_safe(command, tmp_file_name)
|
||||||
@ -39,6 +55,43 @@ function string.trim(s)
|
|||||||
return s:match'^()%s*$' and '' or s:match'^%s*(.*%S)'
|
return s:match'^()%s*$' and '' or s:match'^%s*(.*%S)'
|
||||||
end
|
end
|
||||||
|
|
||||||
|
function string.enquote(s)
|
||||||
|
return "\"" .. s .. "\""
|
||||||
|
end
|
||||||
|
|
||||||
|
local function escape_special_characters(s)
|
||||||
|
local special_characters = "[()%%.[^$%]*+%-?]"
|
||||||
|
if s == nil then return end
|
||||||
|
return (s:gsub(special_characters, "%%%1"))
|
||||||
|
end
|
||||||
|
|
||||||
|
function string.gsplit(s, delimiter)
|
||||||
|
delimiter = delimiter or ","
|
||||||
|
if s:sub(-#delimiter) ~= delimiter then s = s .. delimiter end
|
||||||
|
return s:gmatch("(.-)" .. escape_special_characters(delimiter))
|
||||||
|
end
|
||||||
|
|
||||||
|
function string.split(s, delimiter)
|
||||||
|
local result = {}
|
||||||
|
for item in s:gsplit(delimiter) do
|
||||||
|
result[#result + 1] = item
|
||||||
|
end
|
||||||
|
return result
|
||||||
|
end
|
||||||
|
|
||||||
|
utility.require = function(name)
|
||||||
|
local success, package_or_err = pcall(function()
|
||||||
|
return dofile((arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)")) .. name .. ".lua")
|
||||||
|
end)
|
||||||
|
if success then
|
||||||
|
return package_or_err
|
||||||
|
else
|
||||||
|
print("\n\n" .. tostring(package_or_err))
|
||||||
|
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
-- errors if specified program isn't in the path
|
||||||
utility.required_program = function(name)
|
utility.required_program = function(name)
|
||||||
local command
|
local command
|
||||||
if utility.OS == "Windows" then
|
if utility.OS == "Windows" then
|
||||||
@ -67,6 +120,28 @@ utility.tmp_file_name = function()
|
|||||||
return "." .. utility.uuid() .. ".tmp"
|
return "." .. utility.uuid() .. ".tmp"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
utility.make_safe_file_name = function(file_name)
|
||||||
|
file_name = file_name:gsub("[%\"%:%\\%!%@%#%$%%%^%*%=%{%}%|%;%<%>%?%/]", "") -- everything except the &
|
||||||
|
file_name = file_name:gsub(" %&", ",") -- replacing & with a comma works for 99% of things
|
||||||
|
file_name = file_name:gsub("%&", ",") -- replacing & with a comma works for 99% of things
|
||||||
|
file_name = file_name:gsub("[%s+]", " ") -- more than one space in succession should be a single space
|
||||||
|
return file_name
|
||||||
|
end
|
||||||
|
|
||||||
|
-- io.open, but errors are immediately thrown, and the file is closed for you
|
||||||
|
utility.open = function(file_name, mode, custom_error_message)
|
||||||
|
local file, err = io.open(file_name, mode)
|
||||||
|
if not file then error(custom_error_message or err) end
|
||||||
|
return function(fn)
|
||||||
|
local success, result_or_error = pcall(function() return fn(file) end)
|
||||||
|
file:close()
|
||||||
|
if not success then
|
||||||
|
error(result_or_error) -- custom_error_message is only for when the file doesn't exist, this function should not hide *your* errors
|
||||||
|
end
|
||||||
|
return result_or_error
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
utility.escape_quotes = function(input)
|
utility.escape_quotes = function(input)
|
||||||
-- the order of these commands is important and must be preserved
|
-- the order of these commands is important and must be preserved
|
||||||
input = input:gsub("\\", "\\\\")
|
input = input:gsub("\\", "\\\\")
|
||||||
@ -74,6 +149,7 @@ utility.escape_quotes = function(input)
|
|||||||
return input
|
return input
|
||||||
end
|
end
|
||||||
|
|
||||||
|
-- Example, print all items in this directory: utility.ls(".")(print)
|
||||||
utility.ls = function(path)
|
utility.ls = function(path)
|
||||||
local command
|
local command
|
||||||
if utility.OS == "Windows" then
|
if utility.OS == "Windows" then
|
||||||
|
15
utility-quickref-test.lua
Normal file
15
utility-quickref-test.lua
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
local success, utility = pcall(function()
|
||||||
|
return dofile((arg[0]:match("@?(.*/)") or arg[0]:match("@?(.*\\)")) .. "utility-functions.lua")
|
||||||
|
end)
|
||||||
|
if not success then
|
||||||
|
print("\n\n" .. tostring(utility))
|
||||||
|
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
|
||||||
|
end
|
||||||
|
|
||||||
|
print(utility.path)
|
||||||
|
print(utility.OS)
|
||||||
|
print(utility.uuid())
|
||||||
|
|
||||||
|
print("\n")
|
||||||
|
|
||||||
|
utility.ls(".")(print)
|
14
video-dl.lua
14
video-dl.lua
@ -16,8 +16,6 @@ local help = [[Usage:
|
|||||||
on each.
|
on each.
|
||||||
<url>: Source. YouTube URL expected, but should work with anything
|
<url>: Source. YouTube URL expected, but should work with anything
|
||||||
yt-dlp works with.
|
yt-dlp works with.
|
||||||
|
|
||||||
IMPORTANT: Expects Firefox to be installed with a YouTube account logged in to get cookies from.
|
|
||||||
]]
|
]]
|
||||||
|
|
||||||
if os.execute("where yt-dlp") ~= 0 then
|
if os.execute("where yt-dlp") ~= 0 then
|
||||||
@ -39,18 +37,22 @@ else
|
|||||||
-- "--file" is handled just before execution
|
-- "--file" is handled just before execution
|
||||||
end
|
end
|
||||||
|
|
||||||
|
local core_command = "yt-dlp --retries 100 "
|
||||||
|
local metadata_options = "--write-sub --write-auto-sub --sub-lang \"en.*\" --write-thumbnail --write-description "
|
||||||
|
local quality_ceiling_720 = "-f \"bestvideo[height<=720]+bestaudio/best[height<=720]\" "
|
||||||
|
|
||||||
local execute = {
|
local execute = {
|
||||||
backup = function(url)
|
backup = function(url)
|
||||||
os.execute("yt-dlp --retries 100 --write-sub --write-auto-sub --sub-lang \"en.*\" --write-thumbnail --write-description -f \"bestvideo[height<=720]+bestaudio/best[height<=720]\" \"" .. url .."\" --cookies-from-browser \"firefox\"")
|
os.execute(core_command .. metadata_options .. quality_ceiling_720 .. url:enquote())
|
||||||
end,
|
end,
|
||||||
music = function(url)
|
music = function(url)
|
||||||
os.execute("yt-dlp --retries 100 -x --audio-quality 0 \"" .. url .."\" --cookies-from-browser \"firefox\"")
|
os.execute(core_command .. "-x --audio-quality 0 " .. url:enquote())
|
||||||
end,
|
end,
|
||||||
metadata = function(url)
|
metadata = function(url)
|
||||||
os.execute("yt-dlp --retries 100 --write-sub --write-auto-sub --sub-lang \"en.*\" --write-thumbnail --write-description --skip-download \"" .. url .."\" --cookies-from-browser \"firefox\"")
|
os.execute(core_command .. metadata_options .. "--skip-download " .. url:enquote())
|
||||||
end,
|
end,
|
||||||
video = function(url)
|
video = function(url)
|
||||||
os.execute("yt-dlp --retries 100 -f \"bestvideo[height<=720]+bestaudio/best[height<=720]\" \"" .. url .. "\" --cookies-from-browser \"firefox\"")
|
os.execute(core_command .. quality_ceiling_720 .. url:enquote())
|
||||||
end,
|
end,
|
||||||
}
|
}
|
||||||
execute.clone = execute.backup
|
execute.clone = execute.backup
|
||||||
|
Loading…
Reference in New Issue
Block a user