Organised and extended attr tests

This commit is contained in:
Wouter Scherphof 2013-04-05 19:58:39 +02:00
parent 10a5faf192
commit 2983056fa2
2 changed files with 90 additions and 35 deletions

View File

@ -68,10 +68,12 @@ Selectors can be combined; e.g. `".class:not([attribute]) element.class"`
###Limitations ###Limitations
- Attribute values in selectors currently cannot contain any spaces, since space is interpreted as a delimiter between the `ancestor` and `descendant`, `parent` and `>`, or `>` and `child` parts of the selector - Attribute values in selectors currently cannot contain any spaces, since space is interpreted as a delimiter between the `ancestor` and `descendant`, `parent` and `>`, or `>` and `child` parts of the selector
- Likewise, for the `parent > child` relation, the spaces before and after the `>` are mandatory - Consequently, for the `parent > child` relation, the spaces before and after the `>` are mandatory
- Attribute values in selectors currently also cannot contain any of `#`, `.`, `[`, `]`, `:`, `(`, or `)`
- `<!` elements are not parsed, including doctype, comments, and CDATA - `<!` elements are not parsed, including doctype, comments, and CDATA
- Textnodes are not seperate entries in the tree, so the content of `<p>line1<br />line2</p>` is plainly `"line1<br />line2"` - Textnodes are not seperate entries in the tree, so the content of `<p>line1<br />line2</p>` is plainly `"line1<br />line2"`
- All start and end tags should be explicitly specified in the text to be parsed; omitted tags (as [permitted](http://www.w3.org/TR/html5/syntax.html#optional-tags) by the the HTML spec) are NOT implied. Only the [void](http://www.w3.org/TR/html5/syntax.html#void-elements) elements naturally don't need an end tag - All start and end tags should be explicitly specified in the text to be parsed; omitted tags (as [permitted](http://www.w3.org/TR/html5/syntax.html#optional-tags) by the the HTML spec) are NOT implied. Only the [void](http://www.w3.org/TR/html5/syntax.html#void-elements) elements naturally don't need (and mustn't have) an end tag
- The HTML text is not validated in any way; tag and attribute names and the nesting of different tags is completely arbitrary. The only HTML-specific part of the parser is that it knows which tags are void elements
##Examples ##Examples
See `./doc/samples.lua` See `./doc/samples.lua`

View File

@ -27,9 +27,6 @@ function test_void()
else else
assert_equal("br", n.name, "name") assert_equal("br", n.name, "name")
assert_equal("", n:getcontent(), "content") assert_equal("", n:getcontent(), "content")
for _ in pairs(n.attributes) do
fail("should not have attributes")
end
end end
end end
end end
@ -37,18 +34,12 @@ end
function test_attr() function test_attr()
local tree = htmlparser.parse([[ local tree = htmlparser.parse([[
<n a1 a2= a3='' a4="" <n a1 a2= a3='' a4=""
a5='a"5"' a6="a'6'" a7='a 7' a8='a=8' a5='a"5"' a6="a'6'" a7='#.[] :()' a8='|*+-=?$^%&/'
a9='en-gb' a10='enen' a9=a9
a11='one two three' a10></n>
></n>
<m a9="en-us" a10></m>
<l a9="enen" a11="three four five"></l>
]]) ]])
assert_equal(3, #tree.nodes, "top level") assert_equal(1, #tree.nodes, "top level")
local n local n = tree.nodes[1]
for _,v in ipairs(tree.nodes) do
if v.name == "n" then n = v break end
end
assert(tree("[a1]")[n], "a1") assert(tree("[a1]")[n], "a1")
assert(tree("[a2]")[n], "a2") assert(tree("[a2]")[n], "a2")
assert(tree("[a3]")[n], "a3") assert(tree("[a3]")[n], "a3")
@ -57,27 +48,91 @@ function test_attr()
assert(tree("[a6]")[n], "a6") assert(tree("[a6]")[n], "a6")
assert(tree("[a7]")[n], "a7") assert(tree("[a7]")[n], "a7")
assert(tree("[a8]")[n], "a8") assert(tree("[a8]")[n], "a8")
assert(tree("[a9]")[n], "a9")
assert(tree("[a10]")[n], "a10")
end
function test_attr_equal()
local tree = htmlparser.parse([[
<n a1 a2= a3='' a4=""
a5='a"5"' a6="a'6'" a7='#.[] :()' a8='|*+-=?$^%&/'
a9=a9
a10></n>
]])
assert_equal(1, #tree.nodes, "top level")
local n = tree.nodes[1]
assert(tree("[a1='']")[n], "a1=''") assert(tree("[a1='']")[n], "a1=''")
assert(tree("[a2='']")[n], "a2=''") assert(tree("[a2='']")[n], "a2=''")
assert(tree("[a3='']")[n], "a3=''") assert(tree("[a3='']")[n], "a3=''")
assert(tree("[a4='']")[n], "a4=''") assert(tree("[a4='']")[n], "a4=''")
assert(tree("[a5='a\"5\"']")[n], "a5='a\"5\"'") assert(tree("[a5='a\"5\"']")[n], "a5='a\"5\"'")
assert(tree("[a6=\"a'6'\"]")[n], "a6=\"a'6'\"") assert(tree("[a6=\"a'6'\"]")[n], "a6=\"a'6'\"")
assert(tree("[a8='a=8']")[n], "a8='a=8'") -- not these characters
assert_equal(1, tree("[a10=]"):len(), "a10=") -- (because these have a special meaning as id, class, or attribute selector, hierarchy separator, or filter command)
assert_equal(1, tree("[a10='']"):len(), "a10=''") -- they can occur in the HTML, but not in a selector string
assert_equal(2, tree("[a10!='enen']"):len(), "a10!='enen'") -- assert(tree("[a7='#.[] :()']")[n], "a7='#.[] :()'")
assert_equal(2, tree("[a10!='']"):len(), "a10!=''") assert(tree("[a8='|*+-=?$^%&/']")[n], "a8='|*+-=?$^%&/'")
assert_equal(3, tree("[a0!='']"):len(), "a0!=''") assert(tree("[a9='a9']")[n], "a9='a9'")
assert_equal(0, tree("[a0='']"):len(), "a0=''") assert(tree("[a10='']")[n], "a10=''")
assert_equal(2, tree("[a9|='en']"):len(), "a9|='en'") assert(tree("[a10=]")[n], "a10=")
assert_equal(3, tree("[a9^='en']"):len(), "a9^='en'") end
assert_equal(1, tree("[a9$='en']"):len(), "a9$='en'")
assert_equal(1, tree("[a11~='two']"):len(), "a1~='two'") function test_attr_notequal()
assert_equal(2, tree("[a11~='three']"):len(), "a1~='three'") local tree = htmlparser.parse([[
assert_equal(1, tree("[a11~='four']"):len(), "a1~='four'") <n a1="a1"></n>
assert_equal(1, tree("[a7*='7']"):len(), "a7*='7'") <n a1="a2"></n>
assert_equal(1, tree("[a11*='f']"):len(), "a11*='f'") <n a1></n>
<n></n>
]])
assert_equal(4, #tree.nodes, "top level")
assert_equal(3, tree("[a1!='a1']"):len(), "a1!='a1'")
assert_equal(4, tree("[a1!='b1']"):len(), "a1!='b1'")
assert_equal(3, tree("[a1!='']"):len(), "a1!=''")
assert_equal(3, tree("[a1!=]"):len(), "a1!=")
end
function test_attr_prefix_start_end()
local tree = htmlparser.parse([[
<n a1="en-gb"></n>
<n a1="en-us"></n>
<n a1="en"></n>
<n a1="enen"></n>
<n></n>
]])
assert_equal(5, #tree.nodes, "top level")
assert_equal(3, tree("[a1|='en']"):len(), "a1|='en'")
assert_equal(4, tree("[a1^='en']"):len(), "a1^='en'")
assert_equal(2, tree("[a1$='en']"):len(), "a1$='en'")
end
function test_attr_word()
local tree = htmlparser.parse([[
<n a1="one two three"></n>
<n a1="three four five"></n>
<n a1></n>
<n></n>
]])
assert_equal(4, #tree.nodes, "top level")
assert_equal(1, tree("[a1~='two']"):len(), "a1~='two'")
assert_equal(2, tree("[a1~='three']"):len(), "a1~='three'")
assert_equal(1, tree("[a1~='four']"):len(), "a1~='four'")
end
function test_attr_contains()
local tree = htmlparser.parse([[
<n a1="one"></n>
<n a1="one two three"></n>
<n a1="three four five"></n>
<n a1=""></n>
<n a1></n>
<n></n>
]])
assert_equal(6, #tree.nodes, "top level")
assert_equal(2, tree("[a1*='one']"):len(), "a1*='one'")
assert_equal(2, tree("[a1*='t']"):len(), "a1*='t'")
assert_equal(1, tree("[a1*='f']"):len(), "a1*='f'")
assert_equal(5, tree("[a1*='']"):len(), "a1*=''")
assert_equal(5, tree("[a1*=]"):len(), "a1*=")
end end
function test_descendants() function test_descendants()
@ -104,8 +159,7 @@ function test_descendants()
<child>not</child> <child>not</child>
</arbitrary> </arbitrary>
]]) ]])
local sel = tree("parent child") assert_equal(8, tree("parent child"):len(), 'parent child')
assert_equal(8, sel:len(), 'parent child')
end end
function test_children() function test_children()
@ -132,6 +186,5 @@ function test_children()
<child>not</child> <child>not</child>
</arbitrary> </arbitrary>
]]) ]])
local sel = tree("parent > child") assert_equal(4, tree("parent > child"):len(), 'parent > child')
assert_equal(4, sel:len(), 'parent > child')
end end