added test_attr (and fixed one or two edge cases)

This commit is contained in:
Wouter Scherphof 2013-04-03 22:32:23 +02:00
parent b3bbb56d9f
commit 10a5faf192
3 changed files with 65 additions and 19 deletions

View File

@ -23,21 +23,19 @@ local function parse(text)
local start, k, eq, quote, v local start, k, eq, quote, v
start, apos, k, eq, quote = string.find(tagst, start, apos, k, eq, quote = string.find(tagst,
"%s+" .. -- some uncaptured space "%s+" .. -- some uncaptured space
"([^%s=]+)" .. -- k = an unspaced string up to an optional "=" "([^%s=/>]+)" .. -- k = an unspaced string up to an optional "=" or the "/" or ">"
"(=?)" .. -- eq = the optional; "=", else "" "(=?)" .. -- eq = the optional; "=", else ""
"(['\"]?)", -- quote = an optional "'" or '"' following the "=", or "" "(['\"]?)", -- quote = an optional "'" or '"' following the "=", or ""
apos) apos)
if not k or k == "/>" or k == ">" then break end if not k or k == "/>" or k == ">" then break end
if eq == "" then if eq == "=" then
v = ""
else
local pattern = "=([^%s>]*)" local pattern = "=([^%s>]*)"
if quote ~= '' then if quote ~= "" then
pattern = quote .. "([^" .. quote .. "]*)" .. quote pattern = quote .. "([^" .. quote .. "]*)" .. quote
end end
start, apos, v = string.find(tagst, pattern, apos) start, apos, v = string.find(tagst, pattern, apos)
end end
tag:addattribute(k, v) tag:addattribute(k, v or "")
end end
if voidelements[string.lower(tag.name)] then if voidelements[string.lower(tag.name)] then

View File

@ -51,9 +51,8 @@ function ElementNode:addattribute(k, v)
self.attributes[k] = v self.attributes[k] = v
if string.lower(k) == "id" then if string.lower(k) == "id" then
self.id = v self.id = v
end
-- class attribute contains "space-separated tokens", each of which we'd like quick access to -- class attribute contains "space-separated tokens", each of which we'd like quick access to
if string.lower(k) == "class" then elseif string.lower(k) == "class" then
for class in string.gmatch(v, "%S+") do for class in string.gmatch(v, "%S+") do
table.insert(self.classes, class) table.insert(self.classes, class)
end end
@ -98,18 +97,20 @@ local function select(self, s)
local sets = {[""] = self.deeperelements, ["["] = self.deeperattributes, local sets = {[""] = self.deeperelements, ["["] = self.deeperattributes,
["#"] = self.deeperids, ["."] = self.deeperclasses} ["#"] = self.deeperids, ["."] = self.deeperclasses}
local function match(t, w) local function match(t, w)
local m, v local m, e, v
if t == "[" then w, m, v = string.match(w, if t == "[" then w, m, e, v = string.match(w,
"([^=|%*~%$!%^]+)" .. -- w = 1 or more characters up to a possible "=", "|", "*", "~", "$", "!", or "^" "([^=|%*~%$!%^]+)" .. -- w = 1 or more characters up to a possible "=", "|", "*", "~", "$", "!", or "^"
"([|%*~%$!%^]?)" .. -- m = an optional "|", "*", "~", "$", "!", or "^", preceding the optional "=" "([|%*~%$!%^]?)" .. -- m = an optional "|", "*", "~", "$", "!", or "^", preceding the optional "="
"=?" .. -- an optional uncaptured "=" "(=?)" .. -- e = the optional "="
"(.*)" -- v = anything following the "=", or else "" "(.*)" -- v = anything following the "=", or else ""
) )
end end
local matched = Set:new(sets[t][w]) local matched = Set:new(sets[t][w])
-- attribute value selectors -- attribute value selectors
if v and v ~= "" then if e == "=" then
if #v < 2 then v = "'" .. v .. "'" end -- values should be quoted
v = string.sub(v, 2, #v - 1) -- strip quotes v = string.sub(v, 2, #v - 1) -- strip quotes
if m == "!" then matched = Set:new(self.deepernodes) end -- include those without that attribute
for node in pairs(matched) do for node in pairs(matched) do
local a = node.attributes[w] local a = node.attributes[w]
-- equals -- equals

View File

@ -6,10 +6,9 @@ local lunitx = require("lunitx")
module("html", lunitx.testcase, package.seeall) module("html", lunitx.testcase, package.seeall)
local htmlparser = require("htmlparser") local htmlparser = require("htmlparser")
local tree, sel
function test_void() function test_void()
tree = htmlparser.parse([[ local tree = htmlparser.parse([[
<p> <p>
<br> <br>
<br/> <br/>
@ -27,14 +26,62 @@ function test_void()
assert_equal(4, #n.nodes, "deeper level") assert_equal(4, #n.nodes, "deeper level")
else else
assert_equal("br", n.name, "name") assert_equal("br", n.name, "name")
assert_equal(0, #n.attributes, "attributes")
assert_equal("", n:getcontent(), "content") assert_equal("", n:getcontent(), "content")
for _ in pairs(n.attributes) do
fail("should not have attributes")
end
end end
end end
end end
function test_attr()
local tree = htmlparser.parse([[
<n a1 a2= a3='' a4=""
a5='a"5"' a6="a'6'" a7='a 7' a8='a=8'
a9='en-gb' a10='enen'
a11='one two three'
></n>
<m a9="en-us" a10></m>
<l a9="enen" a11="three four five"></l>
]])
assert_equal(3, #tree.nodes, "top level")
local n
for _,v in ipairs(tree.nodes) do
if v.name == "n" then n = v break end
end
assert(tree("[a1]")[n], "a1")
assert(tree("[a2]")[n], "a2")
assert(tree("[a3]")[n], "a3")
assert(tree("[a4]")[n], "a4")
assert(tree("[a5]")[n], "a5")
assert(tree("[a6]")[n], "a6")
assert(tree("[a7]")[n], "a7")
assert(tree("[a8]")[n], "a8")
assert(tree("[a1='']")[n], "a1=''")
assert(tree("[a2='']")[n], "a2=''")
assert(tree("[a3='']")[n], "a3=''")
assert(tree("[a4='']")[n], "a4=''")
assert(tree("[a5='a\"5\"']")[n], "a5='a\"5\"'")
assert(tree("[a6=\"a'6'\"]")[n], "a6=\"a'6'\"")
assert(tree("[a8='a=8']")[n], "a8='a=8'")
assert_equal(1, tree("[a10=]"):len(), "a10=")
assert_equal(1, tree("[a10='']"):len(), "a10=''")
assert_equal(2, tree("[a10!='enen']"):len(), "a10!='enen'")
assert_equal(2, tree("[a10!='']"):len(), "a10!=''")
assert_equal(3, tree("[a0!='']"):len(), "a0!=''")
assert_equal(0, tree("[a0='']"):len(), "a0=''")
assert_equal(2, tree("[a9|='en']"):len(), "a9|='en'")
assert_equal(3, tree("[a9^='en']"):len(), "a9^='en'")
assert_equal(1, tree("[a9$='en']"):len(), "a9$='en'")
assert_equal(1, tree("[a11~='two']"):len(), "a1~='two'")
assert_equal(2, tree("[a11~='three']"):len(), "a1~='three'")
assert_equal(1, tree("[a11~='four']"):len(), "a1~='four'")
assert_equal(1, tree("[a7*='7']"):len(), "a7*='7'")
assert_equal(1, tree("[a11*='f']"):len(), "a11*='f'")
end
function test_descendants() function test_descendants()
tree = htmlparser.parse([[ local tree = htmlparser.parse([[
<parent>1 <parent>1
<child>1</child> <child>1</child>
<child>2 <child>2
@ -57,12 +104,12 @@ function test_descendants()
<child>not</child> <child>not</child>
</arbitrary> </arbitrary>
]]) ]])
sel = tree("parent child") local sel = tree("parent child")
assert_equal(8, sel:len(), 'parent child') assert_equal(8, sel:len(), 'parent child')
end end
function test_children() function test_children()
tree = htmlparser.parse([[ local tree = htmlparser.parse([[
<parent>1 <parent>1
<child>1</child> <child>1</child>
<child>2 <child>2
@ -85,6 +132,6 @@ function test_children()
<child>not</child> <child>not</child>
</arbitrary> </arbitrary>
]]) ]])
sel = tree("parent > child") local sel = tree("parent > child")
assert_equal(4, sel:len(), 'parent > child') assert_equal(4, sel:len(), 'parent > child')
end end