mirror of
				https://github.com/TangentFoxy/lua-htmlparser.git
				synced 2025-10-25 12:55:00 +00:00 
			
		
		
		
	| @@ -87,33 +87,57 @@ function ElementNode:close(closestart, closeend) | ||||
|   end | ||||
| end | ||||
|  | ||||
| local function escape(s) | ||||
|   local replace = { | ||||
|     ["^"] = "%^", ["$"] = "%$", ["("] = "%(", [")"] = "%)", ["%"] = "%%", ["."] = "%.", | ||||
|     ["["] = "%[", ["]"] = "%]", ["*"] = "%*", ["+"] = "%+", ["-"] = "%-", ["?"] = "%?" | ||||
|   } | ||||
|   local res = "" | ||||
|   for c in string.gmatch(s, ".") do | ||||
|     res = res .. (replace[c] or c) | ||||
|   end | ||||
|   return res | ||||
| end | ||||
|  | ||||
| local function select(self, s) | ||||
|   if not s or type(s) ~= "string" or s == "" then return Set:new() end | ||||
|  | ||||
|   local sets = {[""]  = self.deeperelements, ["["] = self.deeperattributes, | ||||
|                 ["#"] = self.deeperids,      ["."] = self.deeperclasses} | ||||
|   local function match(t, w) | ||||
|     local sets = { | ||||
|       [""]  = self.deeperelements, | ||||
|       ["["] = self.deeperattributes, | ||||
|       ["#"] = self.deeperids, | ||||
|       ["."] = self.deeperclasses | ||||
|     } | ||||
|     local v | ||||
|     if t == "[" then | ||||
|       w, v = string.match(w,  | ||||
|         "([^=]+)" .. -- w = 1 or more characters up to a possible "=" | ||||
|         "=?" ..      -- an optional uncaptured "=" | ||||
|         "(.*)"      -- v = anything following the "=", or else "" | ||||
|     local m, v | ||||
|     if t == "[" then w, m, v = string.match(w,  | ||||
|         "([^=|%*~%$!%^]+)" .. -- w = 1 or more characters up to a possible "=", "|", "*", "~", "$", "!", or "^" | ||||
|         "([|%*~%$!%^]?)" ..   -- m = an optional "|", "*", "~", "$", "!", or "^", preceding the optional "=" | ||||
|         "=?" ..               -- an optional uncaptured "=" | ||||
|         "(.*)"                -- v = anything following the "=", or else "" | ||||
|       ) | ||||
|     end | ||||
|     local matched = sets[t][w] | ||||
|     local matched = Set:new(sets[t][w]) | ||||
|     -- attribute value selectors | ||||
|     if v and v ~= "" then | ||||
|       v = string.sub(v, 2, #v - 1) -- strip quotes | ||||
|       for node in pairs(matched) do | ||||
|         if node.attributes[w] ~= v then | ||||
|           matched:remove(node) | ||||
|         local a = node.attributes[w] | ||||
|         -- equals | ||||
|         if m == "" and a ~= v then matched:remove(node) | ||||
|         -- not equals | ||||
|         elseif m == "!" and a == v then matched:remove(node) | ||||
|         -- prefix | ||||
|         elseif m =="|" and string.match(a, "^[^-]*") ~= v then matched:remove(node) | ||||
|         -- contains | ||||
|         elseif m =="*" and string.match(a, escape(v)) ~= v then matched:remove(node) | ||||
|         -- word | ||||
|         elseif m =="~" then matched:remove(node) | ||||
|           for word in string.gmatch(a, "%S+") do | ||||
|             if word == v then matched:add(node) break end | ||||
|           end | ||||
|         -- starts with | ||||
|         elseif m =="^" and string.match(a, "^" .. escape(v)) ~= v then matched:remove(node) | ||||
|         -- ends with | ||||
|         elseif m =="$" and string.match(a, escape(v) .. "$") ~= v then matched:remove(node) | ||||
|         end | ||||
|       end | ||||
|     end | ||||
|       end -- for node | ||||
|     end -- if v | ||||
|     return matched | ||||
|   end | ||||
|  | ||||
| @@ -123,8 +147,7 @@ local function select(self, s) | ||||
|     resultset = Set:new() | ||||
|     for subject in pairs(subjects) do | ||||
|       local star = subject.deepernodes | ||||
|       if childrenonly then star = Set:new(subject.nodes) end | ||||
|       childrenonly = false | ||||
|       if childrenonly then star = Set:new(subject.nodes) childrenonly = false end | ||||
|       resultset = resultset + star | ||||
|     end | ||||
|     if part == "*" then goto nextpart end | ||||
|   | ||||
							
								
								
									
										12
									
								
								test.html
									
									
									
									
									
								
							
							
						
						
									
										12
									
								
								test.html
									
									
									
									
									
								
							| @@ -1,11 +1,11 @@ | ||||
| <!DOCTYPE html> | ||||
| <html lang="en" test1='val1' test2="val='2'" test3='val="3"' test4="val = 4" test5=val5 test6=val""6> | ||||
| <head> | ||||
| <head words="testing one two three"> | ||||
|   <meta charset="utf-8" /> | ||||
|   <link rel="stylesheet" href="test.css" /> | ||||
|   <link rel="alternate" title="Feed" type="application/atom+xml" href="#" /> | ||||
|   <link rel="stylesheet" href="test.css" hreflang="en" /> | ||||
|   <link rel="alternate" title="Feed" type="application/atom+xml" href="#" hreflang="en-gb" /> | ||||
| </head> | ||||
| <body> | ||||
| <body words="testing three four five"> | ||||
|   <h1>Contents</h1> | ||||
|   <ol class="chapters"> | ||||
|     <li>Preface</li> | ||||
| @@ -27,13 +27,13 @@ | ||||
|   </p> | ||||
|   <ul class="contacts"> | ||||
|     <li id="/contacts/4711"> | ||||
|       <a href="/contacts/4711"> | ||||
|       <a href="/contacts/4711" hreflang="en-us"> | ||||
|         <span class="firstname">Jon</span> | ||||
|         <span class="lastname">Moore</span> | ||||
|       </a> | ||||
|     </li> | ||||
|     <li id="/contacts/4712"> | ||||
|       <a href="/contacts/4712"> | ||||
|       <a href="/contacts/4712" hreflang="english"> | ||||
|         <span class="firstname">Homer</span> | ||||
|         <span class="lastname">Simpson</span> | ||||
|       </a> | ||||
|   | ||||
							
								
								
									
										11
									
								
								test.lua
									
									
									
									
									
								
							
							
						
						
									
										11
									
								
								test.lua
									
									
									
									
									
								
							| @@ -60,6 +60,17 @@ select("[itemscope='']") | ||||
| select("[itemscope=]") | ||||
| select("[itemscope]") | ||||
|  | ||||
| select("[itemscope][itemprop='address']") | ||||
| select("[itemscope][itemprop!='address']") | ||||
| select("[itemscope][itemprop!='adres']") | ||||
| select("[itemscope][itemprop!='']") | ||||
| select("[hreflang|='en']") | ||||
| select("[itemprop*='address']") | ||||
| select("[words~='two']") | ||||
| select("[words~='three']") | ||||
| select("[itemprop$='ion']") | ||||
| select("[hreflang^='en']") | ||||
|  | ||||
| print("\nchapters") | ||||
| local sel, chapters = root("ol.chapters > li"), {} | ||||
| for e in pairs(sel) do | ||||
|   | ||||
		Reference in New Issue
	
	Block a user