added ability to extract titles

This commit is contained in:
Tangent / Rose / Nebula Rosa 2024-11-09 00:42:41 -07:00
parent f593a3c05b
commit 33f9a123e6
3 changed files with 15 additions and 44 deletions

View File

@ -50,7 +50,8 @@ The following is shared:
- `keywords`: Array of Strings: Keywords/Tags. (I'm not sure what the difference is in the final output so it goes in both.)
- `sections`: **See "Book"/"Anthology" variations.** (I call LitErotica's stories sections - because they are often part of a larger whole.)
- `section_titles`: (Optional) Array of Strings: The titles to be used for Table of Contents / headings. (If `sections.naming` is specified, `section_titles` will be ignored.)
- `lazy_titling`: (Optional) Boolean: URLs will be used to generate section titles. (Warning: This process is likely to create janky titles.)
- `extract_titles`: (Optional) Boolean: Titles will be extracted from the first page of every section. (Note: This is compatible with `sections.automatic_naming`, but it can create repeated titles.)
- `lazy_titling`: (Optional) Boolean: URLs will be used to generate section titles. (Warning: This process is likely to create janky titles. Note: This is compatible with `sections.automatic_naming`, but it can create repeated titles.)
- `page_counts`: Array of Integers: The number of pages on LitErotica per "story". (I call them sections because this script was made to put together story series originally.)
#### Variation: Book

View File

@ -156,6 +156,10 @@ local function download_pages(config)
local content_tag = parser:select(".article > div > div") -- TODO add ability to set selector in config!
local text = content_tag[1]:getcontent()
if page == 1 and config.extract_titles then
text = parser:select(".headline")[1]:gettext() .. text
end
utility.open(section_dir .. page .. ".html", "w")(function(page_file)
page_file:write(text .. "\n")
end)

View File

@ -8,48 +8,14 @@ if not success then
error("\n\nThis script may be installed improperly. Follow instructions at:\n\thttps://github.com/TangentFoxy/.lua-files#installation\n")
end
print("---")
local htmlparser = utility.require("htmlparser")
utility.open("TEST.html", "r")(function(html_file)
local raw_html = html_file:read("*all")
local commands = {
"llm run dolphin-mixtral \"How are you?\"",
"llm run curt \"How are you?\"",
"llm run curt2 \"How are you?\"",
"llm run synx \"How are you?\"",
"llm run synx \"Describe actions you would take as a synx.\"",
"llm run synx \"Describe a synx.\"",
"llm run synx \"What are you?\""
}
local parser = htmlparser.parse(raw_html)
local content_tag = parser:select(".article > div > div") -- TODO add ability to set selector in config!
local text = content_tag[1]:getcontent()
-- local llm = loadfile(utility.path .. "llm.lua")
for _, command in ipairs(commands) do
-- print("\n\n\nTEST START", command .. "\n\n\n")
-- print(command:rep(5, "\n"))
for i = 1, 5 do
-- os.execute(command)
-- loadfile(utility.path .. "llm.lua")(command:sub(5))
-- command = command:sub(5)
-- local tab = {}
-- for argument in command:gmatch("%S+") do
-- table.insert(tab, argument)
-- end
-- llm(unpack(tab))
-- print("\nOUTPUT ENDS\n")
-- error("\n\ntmp break\n\n")
-- print(command)
os.execute("echo " .. command .. " >> .run-this-shit.ps1")
end
end
-- os.execute("echo " .. commands[1] .. " >> .run-this-shit.ps1")
os.execute("pwsh .run-this-shit.ps1")
os.execute("rm .run-this-shit.ps1")
print("---")
local title_tag = parser:select(".headline")
print(title_tag[1]:gettext())
end)