diff --git a/src/htmlparser.lua b/src/htmlparser.lua index 209332c..1f05ef9 100644 --- a/src/htmlparser.lua +++ b/src/htmlparser.lua @@ -23,21 +23,19 @@ local function parse(text) local start, k, eq, quote, v start, apos, k, eq, quote = string.find(tagst, "%s+" .. -- some uncaptured space - "([^%s=]+)" .. -- k = an unspaced string up to an optional "=" + "([^%s=/>]+)" .. -- k = an unspaced string up to an optional "=" or the "/" or ">" "(=?)" .. -- eq = the optional; "=", else "" "(['\"]?)", -- quote = an optional "'" or '"' following the "=", or "" apos) if not k or k == "/>" or k == ">" then break end - if eq == "" then - v = "" - else + if eq == "=" then local pattern = "=([^%s>]*)" - if quote ~= '' then + if quote ~= "" then pattern = quote .. "([^" .. quote .. "]*)" .. quote end start, apos, v = string.find(tagst, pattern, apos) end - tag:addattribute(k, v) + tag:addattribute(k, v or "") end if voidelements[string.lower(tag.name)] then diff --git a/src/htmlparser/ElementNode.lua b/src/htmlparser/ElementNode.lua index d6c99e8..e58454c 100644 --- a/src/htmlparser/ElementNode.lua +++ b/src/htmlparser/ElementNode.lua @@ -51,9 +51,8 @@ function ElementNode:addattribute(k, v) self.attributes[k] = v if string.lower(k) == "id" then self.id = v - end -- class attribute contains "space-separated tokens", each of which we'd like quick access to - if string.lower(k) == "class" then + elseif string.lower(k) == "class" then for class in string.gmatch(v, "%S+") do table.insert(self.classes, class) end @@ -98,18 +97,20 @@ local function select(self, s) local sets = {[""] = self.deeperelements, ["["] = self.deeperattributes, ["#"] = self.deeperids, ["."] = self.deeperclasses} local function match(t, w) - local m, v - if t == "[" then w, m, v = string.match(w, + local m, e, v + if t == "[" then w, m, e, v = string.match(w, "([^=|%*~%$!%^]+)" .. -- w = 1 or more characters up to a possible "=", "|", "*", "~", "$", "!", or "^" "([|%*~%$!%^]?)" .. -- m = an optional "|", "*", "~", "$", "!", or "^", preceding the optional "=" - "=?" .. -- an optional uncaptured "=" + "(=?)" .. -- e = the optional "=" "(.*)" -- v = anything following the "=", or else "" ) end local matched = Set:new(sets[t][w]) -- attribute value selectors - if v and v ~= "" then + if e == "=" then + if #v < 2 then v = "'" .. v .. "'" end -- values should be quoted v = string.sub(v, 2, #v - 1) -- strip quotes + if m == "!" then matched = Set:new(self.deepernodes) end -- include those without that attribute for node in pairs(matched) do local a = node.attributes[w] -- equals diff --git a/tst/init.lua b/tst/init.lua index 8f1a53f..1349809 100644 --- a/tst/init.lua +++ b/tst/init.lua @@ -6,10 +6,9 @@ local lunitx = require("lunitx") module("html", lunitx.testcase, package.seeall) local htmlparser = require("htmlparser") -local tree, sel function test_void() - tree = htmlparser.parse([[ + local tree = htmlparser.parse([[
@@ -27,14 +26,62 @@ function test_void()
assert_equal(4, #n.nodes, "deeper level")
else
assert_equal("br", n.name, "name")
- assert_equal(0, #n.attributes, "attributes")
assert_equal("", n:getcontent(), "content")
+ for _ in pairs(n.attributes) do
+ fail("should not have attributes")
+ end
end
end
end
+function test_attr()
+ local tree = htmlparser.parse([[
+