parse valueless attributes (e.g. itemscope)

And allow nested tags, which was a newly encountered bug
This commit is contained in:
Wouter Scherphof 2013-03-21 14:03:17 +01:00
parent 67be34285e
commit 82dbddfd19
2 changed files with 32 additions and 11 deletions

View File

@ -16,14 +16,18 @@ local function parse(text)
local tagst, apos = tag:gettext(), 1 local tagst, apos = tag:gettext(), 1
while true do while true do
local start, k, quote, v local start, k, eq, quote, v
start, apos, k, quote = string.find(tagst, "%s+([^%s=]+)=(['\"]?)", apos) start, apos, k, eq, quote = string.find(tagst, "%s+([^%s=]+)(=?)(['\"]?)", apos)
if not k then break end if not k then break end
if eq == "" then
v = ""
else
local pattern = "=([^%s'\">]*)" local pattern = "=([^%s'\">]*)"
if quote ~= '' then if quote ~= '' then
pattern = quote .. "([^" .. quote .. "]*)" .. quote pattern = quote .. "([^" .. quote .. "]*)" .. quote
end end
start, apos, v = string.find(tagst, pattern, apos) start, apos, v = string.find(tagst, pattern, apos)
end
tag:addattribute(k, v) tag:addattribute(k, v)
end end
@ -31,17 +35,16 @@ local function parse(text)
descend = false descend = false
tag:close() tag:close()
else else
opentags[tag.name] = tag opentags[tag.name] = opentags[tag.name] or {}
table.insert(opentags[tag.name], tag)
end end
local closeend = tpos local closeend = tpos
while true do while true do
local closestart, closing, closename local closestart, closing, closename
closestart, closeend, closing, closename = string.find(root._text, "[^<]*<(/?)(%w+)", closeend) closestart, closeend, closing, closename = string.find(root._text, "[^<]*<(/?)(%w+)", closeend)
closing = closing and closing ~= '' if not closing or closing == "" then break end
if not closing then break end tag = table.remove(opentags[closename])
tag = opentags[closename]
opentags[closename] = nil
closestart = string.find(root._text, "<", closestart) closestart = string.find(root._text, "<", closestart)
tag:close(closestart, closeend + 1) tag:close(closestart, closeend + 1)
node = tag.parent node = tag.parent

View File

@ -39,5 +39,23 @@
</a> </a>
</li> </li>
</ul> </ul>
<section itemscope itemtype="http://schema.org/Person">
Hello, my name is
<span itemprop="name">John Doe</span>,
I am a
<span itemprop="jobTitle">graduate research assistant</span>
at the
<span itemprop="affiliation">University of Dreams</span>.
My friends call me
<span itemprop="additionalName">Johnny</span>.
You can visit my homepage at
<a href="http://www.JohnnyD.com" itemprop="url">www.JohnnyD.com</a>.
<section itemprop="address" itemscope itemtype="http://schema.org/PostalAddress">
I live at
<span itemprop="streetAddress">1234 Peach Drive</span>,
<span itemprop="addressLocality">Warner Robins</span>,
<span itemprop="addressRegion">Georgia</span>.
</section>
</section>
</body> </body>
</html> </html>