mirror of
https://github.com/msva/lua-htmlparser.git
synced 2024-11-27 12:44:22 +00:00
closes #6
:not(), [att=val], [att!=val], [att|=val], [att*=val], [att~=val], [att^=val], [att$=val] Note that the selection is now returned as a simple Set, breaking the abilty brought in by #8 and #9 tot :select() or () on the selection. Of course, the elements in the returned Set are still ElementNodes that can be selected upon.
This commit is contained in:
parent
64f3eb4df3
commit
206f7af3c4
@ -87,33 +87,57 @@ function ElementNode:close(closestart, closeend)
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
local function escape(s)
|
||||||
|
local replace = {
|
||||||
|
["^"] = "%^", ["$"] = "%$", ["("] = "%(", [")"] = "%)", ["%"] = "%%", ["."] = "%.",
|
||||||
|
["["] = "%[", ["]"] = "%]", ["*"] = "%*", ["+"] = "%+", ["-"] = "%-", ["?"] = "%?"
|
||||||
|
}
|
||||||
|
local res = ""
|
||||||
|
for c in string.gmatch(s, ".") do
|
||||||
|
res = res .. (replace[c] or c)
|
||||||
|
end
|
||||||
|
return res
|
||||||
|
end
|
||||||
|
|
||||||
local function select(self, s)
|
local function select(self, s)
|
||||||
if not s or type(s) ~= "string" or s == "" then return Set:new() end
|
if not s or type(s) ~= "string" or s == "" then return Set:new() end
|
||||||
|
local sets = {[""] = self.deeperelements, ["["] = self.deeperattributes,
|
||||||
|
["#"] = self.deeperids, ["."] = self.deeperclasses}
|
||||||
local function match(t, w)
|
local function match(t, w)
|
||||||
local sets = {
|
local m, v
|
||||||
[""] = self.deeperelements,
|
if t == "[" then w, m, v = string.match(w,
|
||||||
["["] = self.deeperattributes,
|
"([^=|%*~%$!%^]+)" .. -- w = 1 or more characters up to a possible "=", "|", "*", "~", "$", "!", or "^"
|
||||||
["#"] = self.deeperids,
|
"([|%*~%$!%^]?)" .. -- m = an optional "|", "*", "~", "$", "!", or "^", preceding the optional "="
|
||||||
["."] = self.deeperclasses
|
"=?" .. -- an optional uncaptured "="
|
||||||
}
|
"(.*)" -- v = anything following the "=", or else ""
|
||||||
local v
|
|
||||||
if t == "[" then
|
|
||||||
w, v = string.match(w,
|
|
||||||
"([^=]+)" .. -- w = 1 or more characters up to a possible "="
|
|
||||||
"=?" .. -- an optional uncaptured "="
|
|
||||||
"(.*)" -- v = anything following the "=", or else ""
|
|
||||||
)
|
)
|
||||||
end
|
end
|
||||||
local matched = sets[t][w]
|
local matched = Set:new(sets[t][w])
|
||||||
|
-- attribute value selectors
|
||||||
if v and v ~= "" then
|
if v and v ~= "" then
|
||||||
v = string.sub(v, 2, #v - 1) -- strip quotes
|
v = string.sub(v, 2, #v - 1) -- strip quotes
|
||||||
for node in pairs(matched) do
|
for node in pairs(matched) do
|
||||||
if node.attributes[w] ~= v then
|
local a = node.attributes[w]
|
||||||
matched:remove(node)
|
-- equals
|
||||||
|
if m == "" and a ~= v then matched:remove(node)
|
||||||
|
-- not equals
|
||||||
|
elseif m == "!" and a == v then matched:remove(node)
|
||||||
|
-- prefix
|
||||||
|
elseif m =="|" and string.match(a, "^[^-]*") ~= v then matched:remove(node)
|
||||||
|
-- contains
|
||||||
|
elseif m =="*" and string.match(a, escape(v)) ~= v then matched:remove(node)
|
||||||
|
-- word
|
||||||
|
elseif m =="~" then matched:remove(node)
|
||||||
|
for word in string.gmatch(a, "%S+") do
|
||||||
|
if word == v then matched:add(node) break end
|
||||||
|
end
|
||||||
|
-- starts with
|
||||||
|
elseif m =="^" and string.match(a, "^" .. escape(v)) ~= v then matched:remove(node)
|
||||||
|
-- ends with
|
||||||
|
elseif m =="$" and string.match(a, escape(v) .. "$") ~= v then matched:remove(node)
|
||||||
end
|
end
|
||||||
end
|
end -- for node
|
||||||
end
|
end -- if v
|
||||||
return matched
|
return matched
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -123,8 +147,7 @@ local function select(self, s)
|
|||||||
resultset = Set:new()
|
resultset = Set:new()
|
||||||
for subject in pairs(subjects) do
|
for subject in pairs(subjects) do
|
||||||
local star = subject.deepernodes
|
local star = subject.deepernodes
|
||||||
if childrenonly then star = Set:new(subject.nodes) end
|
if childrenonly then star = Set:new(subject.nodes) childrenonly = false end
|
||||||
childrenonly = false
|
|
||||||
resultset = resultset + star
|
resultset = resultset + star
|
||||||
end
|
end
|
||||||
if part == "*" then goto nextpart end
|
if part == "*" then goto nextpart end
|
||||||
|
12
test.html
12
test.html
@ -1,11 +1,11 @@
|
|||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="en" test1='val1' test2="val='2'" test3='val="3"' test4="val = 4" test5=val5 test6=val""6>
|
<html lang="en" test1='val1' test2="val='2'" test3='val="3"' test4="val = 4" test5=val5 test6=val""6>
|
||||||
<head>
|
<head words="testing one two three">
|
||||||
<meta charset="utf-8" />
|
<meta charset="utf-8" />
|
||||||
<link rel="stylesheet" href="test.css" />
|
<link rel="stylesheet" href="test.css" hreflang="en" />
|
||||||
<link rel="alternate" title="Feed" type="application/atom+xml" href="#" />
|
<link rel="alternate" title="Feed" type="application/atom+xml" href="#" hreflang="en-gb" />
|
||||||
</head>
|
</head>
|
||||||
<body>
|
<body words="testing three four five">
|
||||||
<h1>Contents</h1>
|
<h1>Contents</h1>
|
||||||
<ol class="chapters">
|
<ol class="chapters">
|
||||||
<li>Preface</li>
|
<li>Preface</li>
|
||||||
@ -27,13 +27,13 @@
|
|||||||
</p>
|
</p>
|
||||||
<ul class="contacts">
|
<ul class="contacts">
|
||||||
<li id="/contacts/4711">
|
<li id="/contacts/4711">
|
||||||
<a href="/contacts/4711">
|
<a href="/contacts/4711" hreflang="en-us">
|
||||||
<span class="firstname">Jon</span>
|
<span class="firstname">Jon</span>
|
||||||
<span class="lastname">Moore</span>
|
<span class="lastname">Moore</span>
|
||||||
</a>
|
</a>
|
||||||
</li>
|
</li>
|
||||||
<li id="/contacts/4712">
|
<li id="/contacts/4712">
|
||||||
<a href="/contacts/4712">
|
<a href="/contacts/4712" hreflang="english">
|
||||||
<span class="firstname">Homer</span>
|
<span class="firstname">Homer</span>
|
||||||
<span class="lastname">Simpson</span>
|
<span class="lastname">Simpson</span>
|
||||||
</a>
|
</a>
|
||||||
|
11
test.lua
11
test.lua
@ -60,6 +60,17 @@ select("[itemscope='']")
|
|||||||
select("[itemscope=]")
|
select("[itemscope=]")
|
||||||
select("[itemscope]")
|
select("[itemscope]")
|
||||||
|
|
||||||
|
select("[itemscope][itemprop='address']")
|
||||||
|
select("[itemscope][itemprop!='address']")
|
||||||
|
select("[itemscope][itemprop!='adres']")
|
||||||
|
select("[itemscope][itemprop!='']")
|
||||||
|
select("[hreflang|='en']")
|
||||||
|
select("[itemprop*='address']")
|
||||||
|
select("[words~='two']")
|
||||||
|
select("[words~='three']")
|
||||||
|
select("[itemprop$='ion']")
|
||||||
|
select("[hreflang^='en']")
|
||||||
|
|
||||||
print("\nchapters")
|
print("\nchapters")
|
||||||
local sel, chapters = root("ol.chapters > li"), {}
|
local sel, chapters = root("ol.chapters > li"), {}
|
||||||
for e in pairs(sel) do
|
for e in pairs(sel) do
|
||||||
|
Loading…
Reference in New Issue
Block a user