:not(), [att=val], [att!=val], [att|=val], [att*=val], [att~=val],
[att^=val], [att$=val]
Note that the selection is now returned as a simple Set, breaking the
abilty brought in by #8 and #9 tot :select() or () on the selection. Of
course, the elements in the returned Set are still ElementNodes that
can be selected upon.
This commit is contained in:
Wouter Scherphof 2013-03-26 09:57:00 +01:00
parent 64f3eb4df3
commit 206f7af3c4
3 changed files with 60 additions and 26 deletions

View File

@ -87,33 +87,57 @@ function ElementNode:close(closestart, closeend)
end end
end end
local function escape(s)
local replace = {
["^"] = "%^", ["$"] = "%$", ["("] = "%(", [")"] = "%)", ["%"] = "%%", ["."] = "%.",
["["] = "%[", ["]"] = "%]", ["*"] = "%*", ["+"] = "%+", ["-"] = "%-", ["?"] = "%?"
}
local res = ""
for c in string.gmatch(s, ".") do
res = res .. (replace[c] or c)
end
return res
end
local function select(self, s) local function select(self, s)
if not s or type(s) ~= "string" or s == "" then return Set:new() end if not s or type(s) ~= "string" or s == "" then return Set:new() end
local sets = {[""] = self.deeperelements, ["["] = self.deeperattributes,
["#"] = self.deeperids, ["."] = self.deeperclasses}
local function match(t, w) local function match(t, w)
local sets = { local m, v
[""] = self.deeperelements, if t == "[" then w, m, v = string.match(w,
["["] = self.deeperattributes, "([^=|%*~%$!%^]+)" .. -- w = 1 or more characters up to a possible "=", "|", "*", "~", "$", "!", or "^"
["#"] = self.deeperids, "([|%*~%$!%^]?)" .. -- m = an optional "|", "*", "~", "$", "!", or "^", preceding the optional "="
["."] = self.deeperclasses "=?" .. -- an optional uncaptured "="
} "(.*)" -- v = anything following the "=", or else ""
local v
if t == "[" then
w, v = string.match(w,
"([^=]+)" .. -- w = 1 or more characters up to a possible "="
"=?" .. -- an optional uncaptured "="
"(.*)" -- v = anything following the "=", or else ""
) )
end end
local matched = sets[t][w] local matched = Set:new(sets[t][w])
-- attribute value selectors
if v and v ~= "" then if v and v ~= "" then
v = string.sub(v, 2, #v - 1) -- strip quotes v = string.sub(v, 2, #v - 1) -- strip quotes
for node in pairs(matched) do for node in pairs(matched) do
if node.attributes[w] ~= v then local a = node.attributes[w]
matched:remove(node) -- equals
if m == "" and a ~= v then matched:remove(node)
-- not equals
elseif m == "!" and a == v then matched:remove(node)
-- prefix
elseif m =="|" and string.match(a, "^[^-]*") ~= v then matched:remove(node)
-- contains
elseif m =="*" and string.match(a, escape(v)) ~= v then matched:remove(node)
-- word
elseif m =="~" then matched:remove(node)
for word in string.gmatch(a, "%S+") do
if word == v then matched:add(node) break end
end
-- starts with
elseif m =="^" and string.match(a, "^" .. escape(v)) ~= v then matched:remove(node)
-- ends with
elseif m =="$" and string.match(a, escape(v) .. "$") ~= v then matched:remove(node)
end end
end end -- for node
end end -- if v
return matched return matched
end end
@ -123,8 +147,7 @@ local function select(self, s)
resultset = Set:new() resultset = Set:new()
for subject in pairs(subjects) do for subject in pairs(subjects) do
local star = subject.deepernodes local star = subject.deepernodes
if childrenonly then star = Set:new(subject.nodes) end if childrenonly then star = Set:new(subject.nodes) childrenonly = false end
childrenonly = false
resultset = resultset + star resultset = resultset + star
end end
if part == "*" then goto nextpart end if part == "*" then goto nextpart end

View File

@ -1,11 +1,11 @@
<!DOCTYPE html> <!DOCTYPE html>
<html lang="en" test1='val1' test2="val='2'" test3='val="3"' test4="val = 4" test5=val5 test6=val""6> <html lang="en" test1='val1' test2="val='2'" test3='val="3"' test4="val = 4" test5=val5 test6=val""6>
<head> <head words="testing one two three">
<meta charset="utf-8" /> <meta charset="utf-8" />
<link rel="stylesheet" href="test.css" /> <link rel="stylesheet" href="test.css" hreflang="en" />
<link rel="alternate" title="Feed" type="application/atom+xml" href="#" /> <link rel="alternate" title="Feed" type="application/atom+xml" href="#" hreflang="en-gb" />
</head> </head>
<body> <body words="testing three four five">
<h1>Contents</h1> <h1>Contents</h1>
<ol class="chapters"> <ol class="chapters">
<li>Preface</li> <li>Preface</li>
@ -27,13 +27,13 @@
</p> </p>
<ul class="contacts"> <ul class="contacts">
<li id="/contacts/4711"> <li id="/contacts/4711">
<a href="/contacts/4711"> <a href="/contacts/4711" hreflang="en-us">
<span class="firstname">Jon</span> <span class="firstname">Jon</span>
<span class="lastname">Moore</span> <span class="lastname">Moore</span>
</a> </a>
</li> </li>
<li id="/contacts/4712"> <li id="/contacts/4712">
<a href="/contacts/4712"> <a href="/contacts/4712" hreflang="english">
<span class="firstname">Homer</span> <span class="firstname">Homer</span>
<span class="lastname">Simpson</span> <span class="lastname">Simpson</span>
</a> </a>

View File

@ -60,6 +60,17 @@ select("[itemscope='']")
select("[itemscope=]") select("[itemscope=]")
select("[itemscope]") select("[itemscope]")
select("[itemscope][itemprop='address']")
select("[itemscope][itemprop!='address']")
select("[itemscope][itemprop!='adres']")
select("[itemscope][itemprop!='']")
select("[hreflang|='en']")
select("[itemprop*='address']")
select("[words~='two']")
select("[words~='three']")
select("[itemprop$='ion']")
select("[hreflang^='en']")
print("\nchapters") print("\nchapters")
local sel, chapters = root("ol.chapters > li"), {} local sel, chapters = root("ol.chapters > li"), {}
for e in pairs(sel) do for e in pairs(sel) do