commit 089402e79adf8672faf0c243550470e1fbeadf26 Author: Wouter Scherphof Date: Thu Mar 28 05:38:48 2013 -0700 Create gh-pages branch via GitHub diff --git a/images/bg_hr.png b/images/bg_hr.png new file mode 100644 index 0000000..7973bd6 Binary files /dev/null and b/images/bg_hr.png differ diff --git a/images/blacktocat.png b/images/blacktocat.png new file mode 100644 index 0000000..6e264fe Binary files /dev/null and b/images/blacktocat.png differ diff --git a/images/icon_download.png b/images/icon_download.png new file mode 100644 index 0000000..a2a287f Binary files /dev/null and b/images/icon_download.png differ diff --git a/images/sprite_download.png b/images/sprite_download.png new file mode 100644 index 0000000..f2babd5 Binary files /dev/null and b/images/sprite_download.png differ diff --git a/index.html b/index.html new file mode 100644 index 0000000..6b78534 --- /dev/null +++ b/index.html @@ -0,0 +1,184 @@ + + + + + + + + + + + LuaRock "htmlparser" + + + + + +
+
+ View on GitHub + +

LuaRock "htmlparser"

+

Parse HTML text into a tree of elements with selectors

+ +
+ Download this project as a .zip file + Download this project as a tar.gz file +
+
+
+ + +
+
+

License

+ +

MIT; see ./doc/LICENSE

+ +

Usage

+ +

Start off with

+ +
require("luarocks.loader")
+local htmlparser = require("htmlparser")
+
+ +

Then, parse some html:

+ +
local root = htmlparser.parse(htmlstring)
+
+ +

The input to parse may be the contents of a complete html document, or any valid html snippet, as long as all tags are correctly opened and closed. +Now, find sepcific contained elements by selecting:

+ +
local elements = root:select(selectorstring)
+
+ +

Or in shorthand:

+ +
local elements = root(selectorstring)
+
+ +

This wil return a Set of elements, all of which are of the same type as the root element, and thus support selecting as well, if ever needed:

+ +
for e in pairs(elements) do
+    print(e.name)
+    local subs = e(subselectorstring)
+    for sub in pairs(subs) do
+        print("", sub.name)
+    end
+end
+
+ +

The root element is a container for the top level elements in the parsed text, i.e. the <html> element in a parsed html document would be a child of the returned root element.

+ +

Selectors

+ +

Supported selectors are a subset of jQuery's selectors:

+ +
    +
  • +"*" all contained elements
  • +
  • +"element" elements with the given tagname
  • +
  • +"#id" elements with the given id attribute value
  • +
  • +".class" elements with the given classname in the class attribute
  • +
  • +"[attribute]" elements with an attribute of the given name
  • +
  • +"[attribute='value']" equals: elements with the given value for the attribute with the given name
  • +
  • +"[attribute!='value']" not equals: elements without an attribute of the given name, or with that attribute, but with a value that is different from the given value
  • +
  • +"[attribute|='value']" prefix: attribute's value is given value, or starts with given value, followed by a hyphen (-)
  • +
  • +"[attribute*='value']" contains: attribute's value contains given value
  • +
  • +"[attribute~='value']" word: attribute's value is a space-separated token, where one of the tokens is the given value
  • +
  • +"[attribute^='value']" starts with: attribute's value starts with given value
  • +
  • +"[attribute$='value']" ends with: attribute's value ends with given value
  • +
  • +":not(selectorstring)" elements not selected by given selector string
  • +
  • +"ancestor descendant" elements selected by the descendant selector string, that are a descendant of any element selected by the ancestor selector string
  • +
  • +"parent > child" elements selected by the child selector string, that are a child element of any element selected by the parent selector string
  • +

Selectors can be combined; e.g. ".class:not([attribute]) element.class"

+ +

Limitations

+ +
    +
  • Attribute values in selectors currently cannot contain any spaces, since space is interpreted as a delimiter between the ancestor and descendant, parent and >, or > and child parts of the selector
  • +
  • Likewise, for the parent > child relation, the spaces before and after the > are mandatory
  • +
  • +<! elements are not parsed, including doctype and comments
  • +
  • Textnodes are not seperate entries in the tree, so the content of <p>line1<br />line2</p> is plainly "line1<br />line2" +
  • +

Examples

+ +

See ./doc/samples.lua

+ +

Element type

+ +

All tree elements provide, apart from :select and (), the following accessors:

+ +

Basic

+ +
    +
  • +.name the element's tagname
  • +
  • +.attributes a table with keys and values for the element's attributes; {} if none
  • +
  • +.id the value of the element's id attribute; nil if not present
  • +
  • +.classes an array with the classes listed in element's class attribute; {} if none
  • +
  • +:getcontent() the raw text between the opening and closing tags of the element; "" if none
  • +
  • +.nodes an array with the element's child elements, {} if none
  • +
  • +.parent the elements that contains this element; root.parent is nil +
  • +

Other

+ +
    +
  • +:gettext() the raw text of the complete element, starting with "<tagname" and ending with "/>" +
  • +
  • +.level how deep the element is in the tree; root level is 0 +
  • +
  • +.root the root element of the tree; root.root is root +
  • +
  • +.deepernodes a Set containing all elements in the tree beneath this element, including this element's .nodes; {} if none
  • +
  • +.deeperelements a table with a key for each distinct tagname in .deepernodes, containing a Set of all deeper element nodes with that name; {} in none
  • +
  • +.deeperattributes as .deeperelements, but keyed on attribute name
  • +
  • +.deeperids as .deeperelements, but keyed on id value
  • +
  • +.deeperclasses as .deeperelements, but keyed on class name
  • +
+
+
+ + + + + + + + diff --git a/javascripts/main.js b/javascripts/main.js new file mode 100644 index 0000000..d8135d3 --- /dev/null +++ b/javascripts/main.js @@ -0,0 +1 @@ +console.log('This would be the main JS file.'); diff --git a/params.json b/params.json new file mode 100644 index 0000000..6a71eae --- /dev/null +++ b/params.json @@ -0,0 +1 @@ +{"name":"LuaRock \"htmlparser\"","tagline":"Parse HTML text into a tree of elements with selectors","body":"[1]: http://wscherphof.github.com/lua-set/\r\n[2]: http://api.jquery.com/category/selectors/\r\n\r\n##License\r\nMIT; see `./doc/LICENSE`\r\n\r\n##Usage\r\nStart off with\r\n```lua\r\nrequire(\"luarocks.loader\")\r\nlocal htmlparser = require(\"htmlparser\")\r\n```\r\nThen, parse some html:\r\n```lua\r\nlocal root = htmlparser.parse(htmlstring)\r\n```\r\nThe input to parse may be the contents of a complete html document, or any valid html snippet, as long as all tags are correctly opened and closed.\r\nNow, find sepcific contained elements by selecting:\r\n```lua\r\nlocal elements = root:select(selectorstring)\r\n```\r\nOr in shorthand:\r\n```lua\r\nlocal elements = root(selectorstring)\r\n```\r\nThis wil return a [Set][1] of elements, all of which are of the same type as the root element, and thus support selecting as well, if ever needed:\r\n```lua\r\nfor e in pairs(elements) do\r\n\tprint(e.name)\r\n\tlocal subs = e(subselectorstring)\r\n\tfor sub in pairs(subs) do\r\n\t\tprint(\"\", sub.name)\r\n\tend\r\nend\r\n```\r\nThe root element is a container for the top level elements in the parsed text, i.e. the `` element in a parsed html document would be a child of the returned root element.\r\n\r\n##Selectors\r\nSupported selectors are a subset of [jQuery's selectors][2]:\r\n\r\n- `\"*\"` all contained elements\r\n- `\"element\"` elements with the given tagname\r\n- `\"#id\"` elements with the given id attribute value\r\n- `\".class\"` elements with the given classname in the class attribute\r\n- `\"[attribute]\"` elements with an attribute of the given name\r\n- `\"[attribute='value']\"` equals: elements with the given value for the attribute with the given name\r\n- `\"[attribute!='value']\"` not equals: elements without an attribute of the given name, or with that attribute, but with a value that is different from the given value\r\n- `\"[attribute|='value']\"` prefix: attribute's value is given value, or starts with given value, followed by a hyphen (`-`)\r\n- `\"[attribute*='value']\"` contains: attribute's value contains given value\r\n- `\"[attribute~='value']\"` word: attribute's value is a space-separated token, where one of the tokens is the given value\r\n- `\"[attribute^='value']\"` starts with: attribute's value starts with given value\r\n- `\"[attribute$='value']\"` ends with: attribute's value ends with given value\r\n- `\":not(selectorstring)\"` elements not selected by given selector string\r\n- `\"ancestor descendant\"` elements selected by the `descendant` selector string, that are a descendant of any element selected by the `ancestor` selector string\r\n- `\"parent > child\"` elements selected by the `child` selector string, that are a child element of any element selected by the `parent` selector string\r\n\r\nSelectors can be combined; e.g. `\".class:not([attribute]) element.class\"`\r\n\r\n###Limitations\r\n- Attribute values in selectors currently cannot contain any spaces, since space is interpreted as a delimiter between the `ancestor` and `descendant`, `parent` and `>`, or `>` and `child` parts of the selector\r\n- Likewise, for the `parent > child` relation, the spaces before and after the `>` are mandatory\r\n- `line1
line2

` is plainly `\"line1
line2\"`\r\n\r\n##Examples\r\nSee `./doc/samples.lua`\r\n\r\n##Element type\r\nAll tree elements provide, apart from `:select` and `()`, the following accessors:\r\n\r\n###Basic\r\n- `.name` the element's tagname\r\n- `.attributes` a table with keys and values for the element's attributes; `{}` if none\r\n- `.id` the value of the element's id attribute; `nil` if not present\r\n- `.classes` an array with the classes listed in element's class attribute; `{}` if none\r\n- `:getcontent()` the raw text between the opening and closing tags of the element; `\"\"` if none\r\n- `.nodes` an array with the element's child elements, `{}` if none\r\n- `.parent` the elements that contains this element; `root.parent` is `nil`\r\n\r\n###Other\r\n- `:gettext()` the raw text of the complete element, starting with `\"\"`\r\n- `.level` how deep the element is in the tree; root level is `0`\r\n- `.root` the root element of the tree; `root.root` is `root`\r\n- `.deepernodes` a [Set][1] containing all elements in the tree beneath this element, including this element's `.nodes`; `{}` if none\r\n- `.deeperelements` a table with a key for each distinct tagname in `.deepernodes`, containing a [Set][1] of all deeper element nodes with that name; `{}` in none\r\n- `.deeperattributes` as `.deeperelements`, but keyed on attribute name\r\n- `.deeperids` as `.deeperelements`, but keyed on id value\r\n- `.deeperclasses` as `.deeperelements`, but keyed on class name\r\n","google":"","note":"Don't delete this file! It's used internally to help with page regeneration."} \ No newline at end of file diff --git a/stylesheets/pygment_trac.css b/stylesheets/pygment_trac.css new file mode 100644 index 0000000..e65cedf --- /dev/null +++ b/stylesheets/pygment_trac.css @@ -0,0 +1,70 @@ +.highlight .hll { background-color: #ffffcc } +.highlight { background: #f0f3f3; } +.highlight .c { color: #0099FF; font-style: italic } /* Comment */ +.highlight .err { color: #AA0000; background-color: #FFAAAA } /* Error */ +.highlight .k { color: #006699; font-weight: bold } /* Keyword */ +.highlight .o { color: #555555 } /* Operator */ +.highlight .cm { color: #0099FF; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #009999 } /* Comment.Preproc */ +.highlight .c1 { color: #0099FF; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #0099FF; font-weight: bold; font-style: italic } /* Comment.Special */ +.highlight .gd { background-color: #FFCCCC; border: 1px solid #CC0000 } /* Generic.Deleted */ +.highlight .ge { font-style: italic } /* Generic.Emph */ +.highlight .gr { color: #FF0000 } /* Generic.Error */ +.highlight .gh { color: #003300; font-weight: bold } /* Generic.Heading */ +.highlight .gi { background-color: #CCFFCC; border: 1px solid #00CC00 } /* Generic.Inserted */ +.highlight .go { color: #AAAAAA } /* Generic.Output */ +.highlight .gp { color: #000099; font-weight: bold } /* Generic.Prompt */ +.highlight .gs { font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #003300; font-weight: bold } /* Generic.Subheading */ +.highlight .gt { color: #99CC66 } /* Generic.Traceback */ +.highlight .kc { color: #006699; font-weight: bold } /* Keyword.Constant */ +.highlight .kd { color: #006699; font-weight: bold } /* Keyword.Declaration */ +.highlight .kn { color: #006699; font-weight: bold } /* Keyword.Namespace */ +.highlight .kp { color: #006699 } /* Keyword.Pseudo */ +.highlight .kr { color: #006699; font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #007788; font-weight: bold } /* Keyword.Type */ +.highlight .m { color: #FF6600 } /* Literal.Number */ +.highlight .s { color: #CC3300 } /* Literal.String */ +.highlight .na { color: #330099 } /* Name.Attribute */ +.highlight .nb { color: #336666 } /* Name.Builtin */ +.highlight .nc { color: #00AA88; font-weight: bold } /* Name.Class */ +.highlight .no { color: #336600 } /* Name.Constant */ +.highlight .nd { color: #9999FF } /* Name.Decorator */ +.highlight .ni { color: #999999; font-weight: bold } /* Name.Entity */ +.highlight .ne { color: #CC0000; font-weight: bold } /* Name.Exception */ +.highlight .nf { color: #CC00FF } /* Name.Function */ +.highlight .nl { color: #9999FF } /* Name.Label */ +.highlight .nn { color: #00CCFF; font-weight: bold } /* Name.Namespace */ +.highlight .nt { color: #330099; font-weight: bold } /* Name.Tag */ +.highlight .nv { color: #003333 } /* Name.Variable */ +.highlight .ow { color: #000000; font-weight: bold } /* Operator.Word */ +.highlight .w { color: #bbbbbb } /* Text.Whitespace */ +.highlight .mf { color: #FF6600 } /* Literal.Number.Float */ +.highlight .mh { color: #FF6600 } /* Literal.Number.Hex */ +.highlight .mi { color: #FF6600 } /* Literal.Number.Integer */ +.highlight .mo { color: #FF6600 } /* Literal.Number.Oct */ +.highlight .sb { color: #CC3300 } /* Literal.String.Backtick */ +.highlight .sc { color: #CC3300 } /* Literal.String.Char */ +.highlight .sd { color: #CC3300; font-style: italic } /* Literal.String.Doc */ +.highlight .s2 { color: #CC3300 } /* Literal.String.Double */ +.highlight .se { color: #CC3300; font-weight: bold } /* Literal.String.Escape */ +.highlight .sh { color: #CC3300 } /* Literal.String.Heredoc */ +.highlight .si { color: #AA0000 } /* Literal.String.Interpol */ +.highlight .sx { color: #CC3300 } /* Literal.String.Other */ +.highlight .sr { color: #33AAAA } /* Literal.String.Regex */ +.highlight .s1 { color: #CC3300 } /* Literal.String.Single */ +.highlight .ss { color: #FFCC33 } /* Literal.String.Symbol */ +.highlight .bp { color: #336666 } /* Name.Builtin.Pseudo */ +.highlight .vc { color: #003333 } /* Name.Variable.Class */ +.highlight .vg { color: #003333 } /* Name.Variable.Global */ +.highlight .vi { color: #003333 } /* Name.Variable.Instance */ +.highlight .il { color: #FF6600 } /* Literal.Number.Integer.Long */ + +.type-csharp .highlight .k { color: #0000FF } +.type-csharp .highlight .kt { color: #0000FF } +.type-csharp .highlight .nf { color: #000000; font-weight: normal } +.type-csharp .highlight .nc { color: #2B91AF } +.type-csharp .highlight .nn { color: #000000 } +.type-csharp .highlight .s { color: #A31515 } +.type-csharp .highlight .sc { color: #A31515 } diff --git a/stylesheets/stylesheet.css b/stylesheets/stylesheet.css new file mode 100644 index 0000000..2bd468a --- /dev/null +++ b/stylesheets/stylesheet.css @@ -0,0 +1,431 @@ +/******************************************************************************* +Slate Theme for GitHub Pages +by Jason Costello, @jsncostello +*******************************************************************************/ + +@import url(pygment_trac.css); + +/******************************************************************************* +MeyerWeb Reset +*******************************************************************************/ + +html, body, div, span, applet, object, iframe, +h1, h2, h3, h4, h5, h6, p, blockquote, pre, +a, abbr, acronym, address, big, cite, code, +del, dfn, em, img, ins, kbd, q, s, samp, +small, strike, strong, sub, sup, tt, var, +b, u, i, center, +dl, dt, dd, ol, ul, li, +fieldset, form, label, legend, +table, caption, tbody, tfoot, thead, tr, th, td, +article, aside, canvas, details, embed, +figure, figcaption, footer, header, hgroup, +menu, nav, output, ruby, section, summary, +time, mark, audio, video { + margin: 0; + padding: 0; + border: 0; + font: inherit; + vertical-align: baseline; +} + +/* HTML5 display-role reset for older browsers */ +article, aside, details, figcaption, figure, +footer, header, hgroup, menu, nav, section { + display: block; +} + +ol, ul { + list-style: none; +} + +blockquote, q { +} + +table { + border-collapse: collapse; + border-spacing: 0; +} + +a:focus { + outline: none; +} + +/******************************************************************************* +Theme Styles +*******************************************************************************/ + +body { + box-sizing: border-box; + color:#373737; + background: #212121; + font-size: 16px; + font-family: 'Myriad Pro', Calibri, Helvetica, Arial, sans-serif; + line-height: 1.5; + -webkit-font-smoothing: antialiased; +} + +h1, h2, h3, h4, h5, h6 { + margin: 10px 0; + font-weight: 700; + color:#222222; + font-family: 'Lucida Grande', 'Calibri', Helvetica, Arial, sans-serif; + letter-spacing: -1px; +} + +h1 { + font-size: 36px; + font-weight: 700; +} + +h2 { + padding-bottom: 10px; + font-size: 32px; + background: url('../images/bg_hr.png') repeat-x bottom; +} + +h3 { + font-size: 24px; +} + +h4 { + font-size: 21px; +} + +h5 { + font-size: 18px; +} + +h6 { + font-size: 16px; +} + +p { + margin: 10px 0 15px 0; +} + +footer p { + color: #f2f2f2; +} + +a { + text-decoration: none; + color: #007edf; + text-shadow: none; + + transition: color 0.5s ease; + transition: text-shadow 0.5s ease; + -webkit-transition: color 0.5s ease; + -webkit-transition: text-shadow 0.5s ease; + -moz-transition: color 0.5s ease; + -moz-transition: text-shadow 0.5s ease; + -o-transition: color 0.5s ease; + -o-transition: text-shadow 0.5s ease; + -ms-transition: color 0.5s ease; + -ms-transition: text-shadow 0.5s ease; +} + +#main_content a:hover { + color: #0069ba; + text-shadow: #0090ff 0px 0px 2px; +} + +footer a:hover { + color: #43adff; + text-shadow: #0090ff 0px 0px 2px; +} + +em { + font-style: italic; +} + +strong { + font-weight: bold; +} + +img { + position: relative; + margin: 0 auto; + max-width: 739px; + padding: 5px; + margin: 10px 0 10px 0; + border: 1px solid #ebebeb; + + box-shadow: 0 0 5px #ebebeb; + -webkit-box-shadow: 0 0 5px #ebebeb; + -moz-box-shadow: 0 0 5px #ebebeb; + -o-box-shadow: 0 0 5px #ebebeb; + -ms-box-shadow: 0 0 5px #ebebeb; +} + +pre, code { + width: 100%; + color: #222; + background-color: #fff; + + font-family: Monaco, "Bitstream Vera Sans Mono", "Lucida Console", Terminal, monospace; + font-size: 14px; + + border-radius: 2px; + -moz-border-radius: 2px; + -webkit-border-radius: 2px; + + + +} + +pre { + width: 100%; + padding: 10px; + box-shadow: 0 0 10px rgba(0,0,0,.1); + overflow: auto; +} + +code { + padding: 3px; + margin: 0 3px; + box-shadow: 0 0 10px rgba(0,0,0,.1); +} + +pre code { + display: block; + box-shadow: none; +} + +blockquote { + color: #666; + margin-bottom: 20px; + padding: 0 0 0 20px; + border-left: 3px solid #bbb; +} + +ul, ol, dl { + margin-bottom: 15px +} + +ul li { + list-style: inside; + padding-left: 20px; +} + +ol li { + list-style: decimal inside; + padding-left: 20px; +} + +dl dt { + font-weight: bold; +} + +dl dd { + padding-left: 20px; + font-style: italic; +} + +dl p { + padding-left: 20px; + font-style: italic; +} + +hr { + height: 1px; + margin-bottom: 5px; + border: none; + background: url('../images/bg_hr.png') repeat-x center; +} + +table { + border: 1px solid #373737; + margin-bottom: 20px; + text-align: left; + } + +th { + font-family: 'Lucida Grande', 'Helvetica Neue', Helvetica, Arial, sans-serif; + padding: 10px; + background: #373737; + color: #fff; + } + +td { + padding: 10px; + border: 1px solid #373737; + } + +form { + background: #f2f2f2; + padding: 20px; +} + +img { + width: 100%; + max-width: 100%; +} + +/******************************************************************************* +Full-Width Styles +*******************************************************************************/ + +.outer { + width: 100%; +} + +.inner { + position: relative; + max-width: 640px; + padding: 20px 10px; + margin: 0 auto; +} + +#forkme_banner { + display: block; + position: absolute; + top:0; + right: 10px; + z-index: 10; + padding: 10px 50px 10px 10px; + color: #fff; + background: url('../images/blacktocat.png') #0090ff no-repeat 95% 50%; + font-weight: 700; + box-shadow: 0 0 10px rgba(0,0,0,.5); + border-bottom-left-radius: 2px; + border-bottom-right-radius: 2px; +} + +#header_wrap { + background: #212121; + background: -moz-linear-gradient(top, #373737, #212121); + background: -webkit-linear-gradient(top, #373737, #212121); + background: -ms-linear-gradient(top, #373737, #212121); + background: -o-linear-gradient(top, #373737, #212121); + background: linear-gradient(top, #373737, #212121); +} + +#header_wrap .inner { + padding: 50px 10px 30px 10px; +} + +#project_title { + margin: 0; + color: #fff; + font-size: 42px; + font-weight: 700; + text-shadow: #111 0px 0px 10px; +} + +#project_tagline { + color: #fff; + font-size: 24px; + font-weight: 300; + background: none; + text-shadow: #111 0px 0px 10px; +} + +#downloads { + position: absolute; + width: 210px; + z-index: 10; + bottom: -40px; + right: 0; + height: 70px; + background: url('../images/icon_download.png') no-repeat 0% 90%; +} + +.zip_download_link { + display: block; + float: right; + width: 90px; + height:70px; + text-indent: -5000px; + overflow: hidden; + background: url(../images/sprite_download.png) no-repeat bottom left; +} + +.tar_download_link { + display: block; + float: right; + width: 90px; + height:70px; + text-indent: -5000px; + overflow: hidden; + background: url(../images/sprite_download.png) no-repeat bottom right; + margin-left: 10px; +} + +.zip_download_link:hover { + background: url(../images/sprite_download.png) no-repeat top left; +} + +.tar_download_link:hover { + background: url(../images/sprite_download.png) no-repeat top right; +} + +#main_content_wrap { + background: #f2f2f2; + border-top: 1px solid #111; + border-bottom: 1px solid #111; +} + +#main_content { + padding-top: 40px; +} + +#footer_wrap { + background: #212121; +} + + + +/******************************************************************************* +Small Device Styles +*******************************************************************************/ + +@media screen and (max-width: 480px) { + body { + font-size:14px; + } + + #downloads { + display: none; + } + + .inner { + min-width: 320px; + max-width: 480px; + } + + #project_title { + font-size: 32px; + } + + h1 { + font-size: 28px; + } + + h2 { + font-size: 24px; + } + + h3 { + font-size: 21px; + } + + h4 { + font-size: 18px; + } + + h5 { + font-size: 14px; + } + + h6 { + font-size: 12px; + } + + code, pre { + min-width: 320px; + max-width: 480px; + font-size: 11px; + } + +}