From 2983056fa29c70ec9be113ab490b13d83be8006e Mon Sep 17 00:00:00 2001 From: Wouter Scherphof Date: Fri, 5 Apr 2013 19:58:39 +0200 Subject: [PATCH] Organised and extended attr tests --- README.md | 6 ++- tst/init.lua | 119 +++++++++++++++++++++++++++++++++++++-------------- 2 files changed, 90 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index f235aa5..da0887e 100644 --- a/README.md +++ b/README.md @@ -68,10 +68,12 @@ Selectors can be combined; e.g. `".class:not([attribute]) element.class"` ###Limitations - Attribute values in selectors currently cannot contain any spaces, since space is interpreted as a delimiter between the `ancestor` and `descendant`, `parent` and `>`, or `>` and `child` parts of the selector -- Likewise, for the `parent > child` relation, the spaces before and after the `>` are mandatory +- Consequently, for the `parent > child` relation, the spaces before and after the `>` are mandatory +- Attribute values in selectors currently also cannot contain any of `#`, `.`, `[`, `]`, `:`, `(`, or `)` - `line1
line2

` is plainly `"line1
line2"` -- All start and end tags should be explicitly specified in the text to be parsed; omitted tags (as [permitted](http://www.w3.org/TR/html5/syntax.html#optional-tags) by the the HTML spec) are NOT implied. Only the [void](http://www.w3.org/TR/html5/syntax.html#void-elements) elements naturally don't need an end tag +- All start and end tags should be explicitly specified in the text to be parsed; omitted tags (as [permitted](http://www.w3.org/TR/html5/syntax.html#optional-tags) by the the HTML spec) are NOT implied. Only the [void](http://www.w3.org/TR/html5/syntax.html#void-elements) elements naturally don't need (and mustn't have) an end tag +- The HTML text is not validated in any way; tag and attribute names and the nesting of different tags is completely arbitrary. The only HTML-specific part of the parser is that it knows which tags are void elements ##Examples See `./doc/samples.lua` diff --git a/tst/init.lua b/tst/init.lua index 1349809..e3b164e 100644 --- a/tst/init.lua +++ b/tst/init.lua @@ -27,9 +27,6 @@ function test_void() else assert_equal("br", n.name, "name") assert_equal("", n:getcontent(), "content") - for _ in pairs(n.attributes) do - fail("should not have attributes") - end end end end @@ -37,18 +34,12 @@ end function test_attr() local tree = htmlparser.parse([[ - - + a5='a"5"' a6="a'6'" a7='#.[] :()' a8='|*+-=?$^%&/' + a9=a9 + a10> ]]) - assert_equal(3, #tree.nodes, "top level") - local n - for _,v in ipairs(tree.nodes) do - if v.name == "n" then n = v break end - end + assert_equal(1, #tree.nodes, "top level") + local n = tree.nodes[1] assert(tree("[a1]")[n], "a1") assert(tree("[a2]")[n], "a2") assert(tree("[a3]")[n], "a3") @@ -57,27 +48,91 @@ function test_attr() assert(tree("[a6]")[n], "a6") assert(tree("[a7]")[n], "a7") assert(tree("[a8]")[n], "a8") + assert(tree("[a9]")[n], "a9") + assert(tree("[a10]")[n], "a10") +end + +function test_attr_equal() + local tree = htmlparser.parse([[ + + ]]) + assert_equal(1, #tree.nodes, "top level") + local n = tree.nodes[1] assert(tree("[a1='']")[n], "a1=''") assert(tree("[a2='']")[n], "a2=''") assert(tree("[a3='']")[n], "a3=''") assert(tree("[a4='']")[n], "a4=''") assert(tree("[a5='a\"5\"']")[n], "a5='a\"5\"'") assert(tree("[a6=\"a'6'\"]")[n], "a6=\"a'6'\"") - assert(tree("[a8='a=8']")[n], "a8='a=8'") - assert_equal(1, tree("[a10=]"):len(), "a10=") - assert_equal(1, tree("[a10='']"):len(), "a10=''") - assert_equal(2, tree("[a10!='enen']"):len(), "a10!='enen'") - assert_equal(2, tree("[a10!='']"):len(), "a10!=''") - assert_equal(3, tree("[a0!='']"):len(), "a0!=''") - assert_equal(0, tree("[a0='']"):len(), "a0=''") - assert_equal(2, tree("[a9|='en']"):len(), "a9|='en'") - assert_equal(3, tree("[a9^='en']"):len(), "a9^='en'") - assert_equal(1, tree("[a9$='en']"):len(), "a9$='en'") - assert_equal(1, tree("[a11~='two']"):len(), "a1~='two'") - assert_equal(2, tree("[a11~='three']"):len(), "a1~='three'") - assert_equal(1, tree("[a11~='four']"):len(), "a1~='four'") - assert_equal(1, tree("[a7*='7']"):len(), "a7*='7'") - assert_equal(1, tree("[a11*='f']"):len(), "a11*='f'") + -- not these characters + -- (because these have a special meaning as id, class, or attribute selector, hierarchy separator, or filter command) + -- they can occur in the HTML, but not in a selector string + -- assert(tree("[a7='#.[] :()']")[n], "a7='#.[] :()'") + assert(tree("[a8='|*+-=?$^%&/']")[n], "a8='|*+-=?$^%&/'") + assert(tree("[a9='a9']")[n], "a9='a9'") + assert(tree("[a10='']")[n], "a10=''") + assert(tree("[a10=]")[n], "a10=") +end + +function test_attr_notequal() + local tree = htmlparser.parse([[ + + + + + ]]) + assert_equal(4, #tree.nodes, "top level") + assert_equal(3, tree("[a1!='a1']"):len(), "a1!='a1'") + assert_equal(4, tree("[a1!='b1']"):len(), "a1!='b1'") + assert_equal(3, tree("[a1!='']"):len(), "a1!=''") + assert_equal(3, tree("[a1!=]"):len(), "a1!=") +end + +function test_attr_prefix_start_end() + local tree = htmlparser.parse([[ + + + + + + ]]) + assert_equal(5, #tree.nodes, "top level") + assert_equal(3, tree("[a1|='en']"):len(), "a1|='en'") + assert_equal(4, tree("[a1^='en']"):len(), "a1^='en'") + assert_equal(2, tree("[a1$='en']"):len(), "a1$='en'") +end + +function test_attr_word() + local tree = htmlparser.parse([[ + + + + + ]]) + assert_equal(4, #tree.nodes, "top level") + assert_equal(1, tree("[a1~='two']"):len(), "a1~='two'") + assert_equal(2, tree("[a1~='three']"):len(), "a1~='three'") + assert_equal(1, tree("[a1~='four']"):len(), "a1~='four'") +end + +function test_attr_contains() + local tree = htmlparser.parse([[ + + + + + + + ]]) + assert_equal(6, #tree.nodes, "top level") + assert_equal(2, tree("[a1*='one']"):len(), "a1*='one'") + assert_equal(2, tree("[a1*='t']"):len(), "a1*='t'") + assert_equal(1, tree("[a1*='f']"):len(), "a1*='f'") + assert_equal(5, tree("[a1*='']"):len(), "a1*=''") + assert_equal(5, tree("[a1*=]"):len(), "a1*=") end function test_descendants() @@ -104,8 +159,7 @@ function test_descendants() not ]]) - local sel = tree("parent child") - assert_equal(8, sel:len(), 'parent child') + assert_equal(8, tree("parent child"):len(), 'parent child') end function test_children() @@ -132,6 +186,5 @@ function test_children() not ]]) - local sel = tree("parent > child") - assert_equal(4, sel:len(), 'parent > child') + assert_equal(4, tree("parent > child"):len(), 'parent > child') end \ No newline at end of file