From 33f9a123e6cabf8645c01d7c416ae6f5df2977af Mon Sep 17 00:00:00 2001 From: Tangent Date: Sat, 9 Nov 2024 00:42:41 -0700 Subject: [PATCH] added ability to extract titles --- README.md | 3 ++- make-epub.lua | 4 ++++ test.lua | 52 +++++++++------------------------------------------ 3 files changed, 15 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index e4e740d..6eacabe 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,8 @@ The following is shared: - `keywords`: Array of Strings: Keywords/Tags. (I'm not sure what the difference is in the final output so it goes in both.) - `sections`: **See "Book"/"Anthology" variations.** (I call LitErotica's stories sections - because they are often part of a larger whole.) - `section_titles`: (Optional) Array of Strings: The titles to be used for Table of Contents / headings. (If `sections.naming` is specified, `section_titles` will be ignored.) -- `lazy_titling`: (Optional) Boolean: URLs will be used to generate section titles. (Warning: This process is likely to create janky titles.) +- `extract_titles`: (Optional) Boolean: Titles will be extracted from the first page of every section. (Note: This is compatible with `sections.automatic_naming`, but it can create repeated titles.) +- `lazy_titling`: (Optional) Boolean: URLs will be used to generate section titles. (Warning: This process is likely to create janky titles. Note: This is compatible with `sections.automatic_naming`, but it can create repeated titles.) - `page_counts`: Array of Integers: The number of pages on LitErotica per "story". (I call them sections because this script was made to put together story series originally.) #### Variation: Book diff --git a/make-epub.lua b/make-epub.lua index 1332820..a21485c 100755 --- a/make-epub.lua +++ b/make-epub.lua @@ -156,6 +156,10 @@ local function download_pages(config) local content_tag = parser:select(".article > div > div") -- TODO add ability to set selector in config! local text = content_tag[1]:getcontent() + if page == 1 and config.extract_titles then + text = parser:select(".headline")[1]:gettext() .. text + end + utility.open(section_dir .. page .. ".html", "w")(function(page_file) page_file:write(text .. "\n") end) diff --git a/test.lua b/test.lua index d4ca384..55138eb 100644 --- a/test.lua +++ b/test.lua @@ -8,48 +8,14 @@ if not success then error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n") end -print("---") +local htmlparser = utility.require("htmlparser") +utility.open("TEST.html", "r")(function(html_file) + local raw_html = html_file:read("*all") -local commands = { - "llm run dolphin-mixtral \"How are you?\"", - "llm run curt \"How are you?\"", - "llm run curt2 \"How are you?\"", - "llm run synx \"How are you?\"", - "llm run synx \"Describe actions you would take as a synx.\"", - "llm run synx \"Describe a synx.\"", - "llm run synx \"What are you?\"" -} + local parser = htmlparser.parse(raw_html) + local content_tag = parser:select(".article > div > div") -- TODO add ability to set selector in config! + local text = content_tag[1]:getcontent() --- local llm = loadfile(utility.path .. "llm.lua") - -for _, command in ipairs(commands) do - -- print("\n\n\nTEST START", command .. "\n\n\n") - - -- print(command:rep(5, "\n")) - - for i = 1, 5 do - -- os.execute(command) - -- loadfile(utility.path .. "llm.lua")(command:sub(5)) - - -- command = command:sub(5) - -- local tab = {} - -- for argument in command:gmatch("%S+") do - -- table.insert(tab, argument) - -- end - -- llm(unpack(tab)) - - -- print("\nOUTPUT ENDS\n") - - -- error("\n\ntmp break\n\n") - - - -- print(command) - os.execute("echo " .. command .. " >> .run-this-shit.ps1") - end -end - --- os.execute("echo " .. commands[1] .. " >> .run-this-shit.ps1") -os.execute("pwsh .run-this-shit.ps1") -os.execute("rm .run-this-shit.ps1") - -print("---") + local title_tag = parser:select(".headline") + print(title_tag[1]:gettext()) +end)