From a15ea45b822a213dec8c41fb55db0b4939edd7a1 Mon Sep 17 00:00:00 2001 From: Niklas Frykholm Date: Sat, 31 May 2008 12:00:00 +0300 Subject: [PATCH] Import markdown from http://frykholm.se wget http://frykholm.se/files/markdown.lua wget http://frykholm.se/files/markdown-tests.lua This is the version available on LuaRocks as markdown 0.32-2. LDoc bundles a patched version compatible with Lua 5.2+. --- markdown-tests.lua | 4478 ++++++++++++++++++++++++++++++++++++++++++++ markdown.lua | 1359 ++++++++++++++ 2 files changed, 5837 insertions(+) create mode 100644 markdown-tests.lua create mode 100644 markdown.lua diff --git a/markdown-tests.lua b/markdown-tests.lua new file mode 100644 index 0000000..00b0ad2 --- /dev/null +++ b/markdown-tests.lua @@ -0,0 +1,4478 @@ +if not markdown then + require "markdown" +end + +-- Splits the text into an array of separate lines. +local function split(text, sep) + sep = sep or "\n" + local lines = {} + local pos = 1 + while true do + local b,e = text:find(sep, pos) + if not b then table.insert(lines, text:sub(pos)) break end + table.insert(lines, text:sub(pos, b-1)) + pos = e + 1 + end + return lines +end + +---------------------------------------------------------------------- +-- Unit tests +---------------------------------------------------------------------- + +-- Unit/regression tests for the markdown() function. Each test is specified +-- as a sequence of input/expected output pairs, separated by the ~ character. + +-- Set up a table to store all the tests. Customize __newindex to record the +-- order in which the tests are created so that we can process them in the right +-- order. +local TESTS = {} +local TESTS_MT = { + __index = _G, + __newindex = function(t,k,v) + if k:sub(1,1) == "_" then rawset(t,k,v) return end + + if t._indices == nil then t._indices = {} end + table.insert(t._indices, k) + rawset(t,k,v) + end +} + +setmetatable(TESTS, TESTS_MT) +setfenv(1, TESTS) + +-- Test header markers +headers = [[ +This is an h1 +============ + +This is an h2 +--------------- +~ +

This is an h1

+ +

This is an h2

+~ +# This is an h1 + +## This is an h2 + +###### This is an h6 + +# This is an h1 # + +## This is an h2 ## + +### This is an h3 ###### +~ +

This is an h1

+ +

This is an h2

+ +
This is an h6
+ +

This is an h1

+ +

This is an h2

+ +

This is an h3

+~ +# A header with *italics* in it +~ +

A header with italics in it

+~ +# A header with a [link][1] in it. + + [1]: http://google.com/ +~ +

A header with a link in it.

+]] + +-- Test blockquotes +blockquotes = [[ +> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, +> consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. +> Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. +> +> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse +> id sem consectetuer libero luctus adipiscing. +~ +
+

This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, + consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. + Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.

+ +

Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse + id sem consectetuer libero luctus adipiscing.

+
+~ +> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, +consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. +Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. + +> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse +id sem consectetuer libero luctus adipiscing. +~ +
+

This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, + consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. + Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.

+ +

Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse + id sem consectetuer libero luctus adipiscing.

+
+~ +> This is the first level of quoting. +> +> > This is nested blockquote. +> +> Back to the first level. +~ +
+

This is the first level of quoting.

+ +
+

Thisi s nested blockquote.

+
+ +

Back to the first level.

+
+~ +> ## This is a header. +> +> 1. This is the first list item. +> 2. This is the second list item. +> +> Here's some example code: +> +> return shell_exec("echo $input | $markdown_script"); +~ +
+

This is a header.

+ +
    +
  1. This is the first list item.
  2. +
  3. This is the second list item.
  4. +
+ +

Here's some example code:

+ +
return shell_exec("echo $input | $markdown_script");
+
+
+]] + +-- Test lists +lists = [[ +* Red +* Green +* Blue +~ + +~ ++ Red ++ Green ++ Blue +~ + +~ +- Red +- Green +- Blue +~ + +~ +1. Bird +2. McHale +3. Parish +~ +
    +
  1. Bird
  2. +
  3. McHale
  4. +
  5. Parish
  6. +
+~ +1. Bird +1. McHale +1. Parish +~ +
    +
  1. Bird
  2. +
  3. McHale
  4. +
  5. Parish
  6. +
+~ +3. Bird +1. McHale +8. Parish +~ +
    +
  1. Bird
  2. +
  3. McHale
  4. +
  5. Parish
  6. +
+~ +* Lorem ipsum dolor sit amet, consectetuer adipiscing elit. + Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, + viverra nec, fringilla in, laoreet vitae, risus. +* Donec sit amet nisl. Aliquam semper ipsum sit amet velit. + Suspendisse id sem consectetuer libero luctus adipiscing. +~ + +~ +* Lorem ipsum dolor sit amet, consectetuer adipiscing elit. +Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, +viverra nec, fringilla in, laoreet vitae, risus. +* Donec sit amet nisl. Aliquam semper ipsum sit amet velit. +Suspendisse id sem consectetuer libero luctus adipiscing. +~ + +~ +* Bird + +* Magic +~ + +~ +1. This is a list item with two paragraphs. Lorem ipsum dolor + sit amet, consectetuer adipiscing elit. Aliquam hendrerit + mi posuere lectus. + + Vestibulum enim wisi, viverra nec, fringilla in, laoreet + vitae, risus. Donec sit amet nisl. Aliquam semper ipsum + sit amet velit. + +2. Suspendisse id sem consectetuer libero luctus adipiscing. +~ +
    +
  1. This is a list item with two paragraphs. Lorem ipsum dolor + sit amet, consectetuer adipiscing elit. Aliquam hendrerit + mi posuere lectus.

    + +

    Vestibulum enim wisi, viverra nec, fringilla in, laoreet + vitae, risus. Donec sit amet nisl. Aliquam semper ipsum + sit amet velit.

  2. + +
  3. Suspendisse id sem consectetuer libero luctus adipiscing.

  4. +
+~ +* This is a list item with two paragraphs. + + This is the second paragraph in the list item. You're +only required to indent the first line. Lorem ipsum dolor +sit amet, consectetuer adipiscing elit. + +* Another item in the same list. +~ + +~ +* A list item with a blockquote: + + > This is a blockquote + > inside a list item. +~ + +~ +* A list item with a code block: + + +~ + +~ +1986. What a great season. +~ +
    +
  1. What a great season.
  2. +
+~ +1986\. What a great season. +~ +

1986. What a great season.

+]] + +-- Test code blocks +code_blocks = [[ +This is a normal paragraph: + + This is a code block. +~ +

This is a normal paragraph:

+ +
This is a code block.
+
+~ +Here is an example of AppleScript: + + tell application "Foo" + beep + end tell +~ +

Here is an example of AppleScript:

+ +
tell application "Foo"
+    beep
+end tell
+
+~ + +~ +
<div class="footer">
+    &copy; 2004 Foo Corporation
+</div>
+
+~ + A code block with *asterisks*. + And _underlines_. +~ +
A code block with *asterisks*.
+And _underlines_.
+
+]] + +-- Test rules +rules = [[ +* * * + +*** + +***** + +- - - + +--------------------------------------- +~ +
+ +
+ +
+ +
+ +
+]] + +-- Test links +links = [[ +This is [an example](http://example.com/ "Title") inline link. +~ +

This is an example inline link.

+~ +[This link](http://example.net/) has no title attribute. +~ +

This link has no title attribute.

+~ +See my [About](/about/) page for details. +~ +

See my About page for details.

+~ +This is [an example][id] reference-style link. + +[id]: http://example.com/ "Optional Title Here" + +Other text. +~ +

This is an example reference-style link.

+ +

Other text.

+~ +This is [an example] [id] reference-style link. +[id]: http://example.com/ +~ +

This is an example reference-style link.

+~ +This is [an example][id] reference-style link. +[id]: http://example.com/ 'Optional Title Here' +~ +

This is an example reference-style link.

+~ +This is [an example][id] reference-style link. +[id]: http://example.com/ (Optional Title Here) +~ +

This is an example reference-style link.

+~ +This is [an example][id] reference-style link. +[id]: (Optional Title Here) +~ +

This is an example reference-style link.

+~ +This is [an example][ID] reference-style link. +[id]: (Optional Title Here) +~ +

This is an example reference-style link.

+~ +Visit [Daring Fireball][] for more information. +[Daring Fireball]: http://daringfireball.net/ +~ +

Visit Daring Fireball for more information.

+~ +I get 10 times more traffic from [Google] [1] than from +[Yahoo] [2] or +[MSN] [3]. + + [1]: http://google.com/ "Google" + [2]: http://search.yahoo.com/ "Yahoo Search" + [3]: http://search.msn.com/ "MSN Search" +~ +

I get 10 times more traffic from Google than from +Yahoo or +MSN.

+~ +I get 10 times more traffic from [Google][] than from +[Yahoo][] or [MSN][]. + + [google]: http://google.com/ "Google" + [yahoo]: http://search.yahoo.com/ "Yahoo Search" + [msn]: http://search.msn.com/ "MSN Search" +~ +

I get 10 times more traffic from Google than from +Yahoo +or MSN.

+~ +I get 10 times more traffic from [Google](http://google.com/ "Google") +than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or +[MSN](http://search.msn.com/ "MSN Search"). +~ +

I get 10 times more traffic from Google than from +Yahoo +or MSN.

+]] + +-- Test emphasis +emphasis = [[ +Test *single asterisks* test. +~ +

Test single asterisks test.

+~ +Test _single underscore_ test. +~ +

Test single underscore test.

+~ +Test **double asterisks** test. +~ +

Test double asterisks test.

+~ +Test __double underscores__ test. +~ +

Test double underscores test.

+~ +un*fucking*believable +~ +

unfuckingbelievable

+~ +\*this text is surrounded by literal asterisks\* +~ +

*this text is surrounded by literal asterisks*

+~ +Test with *two* different *words*. +~ +

Test with two different words.

+]] + +-- Test code +code = [[ +Use the `printf()` function. +~ +

Use the printf() function.

+~ +``There is a literal backtick (`) here.`` +~ +

There is a literal backtick (`) here.

+~ +``There is a literal backtick (`) here.`` +~ +

There is a literal backtick (`) here.

+~ +Please don't use any `` tags. +~ +

Please don't use any <blink> tags.

+~ +`—` is the decimal-encoded equivalent of `—`. +~ +

&#8212; is the decimal-encoded +equivalent of &mdash;.

+~ +Markdown treats asterisks (`*`) and underscores (`_`) as +indicators of emphasis. Text wrapped with one `*` or `_` will be +wrapped with an +~ +

Markdown treats asterisks (*) and underscores +(_) as indicators of emphasis. Text wrapped with one +* or _ will be wrapped with an

+]] + +-- Test images +images = [[ +![Alt text](/path/to/img.jpg) + +![Alt text](/path/to/img.jpg "Optional title") +~ +

Alt text

+ +

Alt text

+~ +![Alt text][id] + +[id]: url/to/image "Optional title attribute" +~ +

Alt text

+]] + +-- Test autolinking +autolinks = [[ +An auto link . +~ +

An auto link http://www.google.com/.

+~ +Mail links and . +~ +

Mail links +niklas@frykholm +.se and +niklas@frykholm +.se.

+]] + +-- Test escape codes +escapes = [[ +\*literal asterisks\* +~ +

*literal asterisks*

+]] + +-- Test hard linebreaks +linebreaks = [[ +Poetry +in +motion +~ +

Poetry
+in
+motion

+]] + +-- Test amps and angles conversion +amps = [[ +© +~ +

©

+~ +AT&T +~ +

AT&T

+~ +4 < 5 +~ +

4 < 5

+]] + +-- Test raw blocks +raw = [[ +
+ This should not be disturbed by Markdown. +
+~ +
+ This should not be disturbed by Markdown. +
+]] + +-- Markdown test suit + +markdown_amps_and_angles = [[ +AT&T has an ampersand in their name. + +AT&T is another way to write it. + +This & that. + +4 < 5. + +6 > 5. + +Here's a [link] [1] with an ampersand in the URL. + +Here's a link with an amersand in the link text: [AT&T] [2]. + +Here's an inline [link](/script?foo=1&bar=2). + +Here's an inline [link](). + +[1]: http://example.com/?foo=1&bar=2 +[2]: http://att.com/ "AT&T" +~ +

AT&T has an ampersand in their name.

+ +

AT&T is another way to write it.

+ +

This & that.

+ +

4 < 5.

+ +

6 > 5.

+ +

Here's a link with an ampersand in the URL.

+ +

Here's a link with an amersand in the link text: AT&T.

+ +

Here's an inline link.

+ +

Here's an inline link.

+]] + +markdown_auto_links = [[ +Link: . + +With an ampersand: + +* In a list? +* +* It should. + +> Blockquoted: + +Auto-links should not occur here: `` + + or here: +~ +

Link: http://example.com/.

+ +

With an ampersand: http://example.com/?foo=1&bar=2

+ + + +
+

Blockquoted: http://example.com/

+
+ +

Auto-links should not occur here: <http://example.com/>

+ +
or here: <http://example.com/>
+
+]] + +markdown_backslash_escapes = [[ +These should all get escaped: + +Backslash: \\ + +Backtick: \` + +Asterisk: \* + +Underscore: \_ + +Left brace: \{ + +Right brace: \} + +Left bracket: \[ + +Right bracket: \] + +Left paren: \( + +Right paren: \) + +Greater-than: \> + +Hash: \# + +Period: \. + +Bang: \! + +Plus: \+ + +Minus: \- + + + +These should not, because they occur within a code block: + + Backslash: \\ + + Backtick: \` + + Asterisk: \* + + Underscore: \_ + + Left brace: \{ + + Right brace: \} + + Left bracket: \[ + + Right bracket: \] + + Left paren: \( + + Right paren: \) + + Greater-than: \> + + Hash: \# + + Period: \. + + Bang: \! + + Plus: \+ + + Minus: \- + + +Nor should these, which occur in code spans: + +Backslash: `\\` + +Backtick: `` \` `` + +Asterisk: `\*` + +Underscore: `\_` + +Left brace: `\{` + +Right brace: `\}` + +Left bracket: `\[` + +Right bracket: `\]` + +Left paren: `\(` + +Right paren: `\)` + +Greater-than: `\>` + +Hash: `\#` + +Period: `\.` + +Bang: `\!` + +Plus: `\+` + +Minus: `\-` +~ +

These should all get escaped:

+ +

Backslash: \

+ +

Backtick: `

+ +

Asterisk: *

+ +

Underscore: _

+ +

Left brace: {

+ +

Right brace: }

+ +

Left bracket: [

+ +

Right bracket: ]

+ +

Left paren: (

+ +

Right paren: )

+ +

Greater-than: >

+ +

Hash: #

+ +

Period: .

+ +

Bang: !

+ +

Plus: +

+ +

Minus: -

+ +

These should not, because they occur within a code block:

+ +
Backslash: \\
+
+Backtick: \`
+
+Asterisk: \*
+
+Underscore: \_
+
+Left brace: \{
+
+Right brace: \}
+
+Left bracket: \[
+
+Right bracket: \]
+
+Left paren: \(
+
+Right paren: \)
+
+Greater-than: \>
+
+Hash: \#
+
+Period: \.
+
+Bang: \!
+
+Plus: \+
+
+Minus: \-
+
+ +

Nor should these, which occur in code spans:

+ +

Backslash: \\

+ +

Backtick: \`

+ +

Asterisk: \*

+ +

Underscore: \_

+ +

Left brace: \{

+ +

Right brace: \}

+ +

Left bracket: \[

+ +

Right bracket: \]

+ +

Left paren: \(

+ +

Right paren: \)

+ +

Greater-than: \>

+ +

Hash: \#

+ +

Period: \.

+ +

Bang: \!

+ +

Plus: \+

+ +

Minus: \-

+]] + +markdown_blockquotes_with_codeblocks = [[ +> Example: +> +> sub status { +> print "working"; +> } +> +> Or: +> +> sub status { +> return "working"; +> } +~ +
+

Example:

+ +
sub status {
+    print "working";
+}
+
+ +

Or:

+ +
sub status {
+    return "working";
+}
+
+
+]] + +markdown_hard_wrapped_paragraphs_with_list_like_lines = [[ +In Markdown 1.0.0 and earlier. Version +8. This line turns into a list item. +Because a hard-wrapped line in the +middle of a paragraph looked like a +list item. + +Here's one with a bullet. +* criminey. +~ +

In Markdown 1.0.0 and earlier. Version +8. This line turns into a list item. +Because a hard-wrapped line in the +middle of a paragraph looked like a +list item.

+ +

Here's one with a bullet. +* criminey.

+]] + +markdown_horizontal_rules = [[ +Dashes: + +--- + + --- + + --- + + --- + + --- + +- - - + + - - - + + - - - + + - - - + + - - - + + +Asterisks: + +*** + + *** + + *** + + *** + + *** + +* * * + + * * * + + * * * + + * * * + + * * * + + +Underscores: + +___ + + ___ + + ___ + + ___ + + ___ + +_ _ _ + + _ _ _ + + _ _ _ + + _ _ _ + + _ _ _ +~ +

Dashes:

+ +
+ +
+ +
+ +
+ +
---
+
+ +
+ +
+ +
+ +
+ +
- - -
+
+ +

Asterisks:

+ +
+ +
+ +
+ +
+ +
***
+
+ +
+ +
+ +
+ +
+ +
* * *
+
+ +

Underscores:

+ +
+ +
+ +
+ +
+ +
___
+
+ +
+ +
+ +
+ +
+ +
_ _ _
+
+]] + +-- currently not supported by regular markdown, so we ignore it too +--[[ +markdown_inline_html_advanced = +Simple block on one line: + +
foo
+ +And nested without indentation: + +
+
+
+foo +
+
+
bar
+
+~ +

Simple block on one line:

+ +
foo
+ +

And nested without indentation:

+ +
+
+
+foo +
+
+
bar
+
+]] + +markdown_inline_html_simple = [[ +Here's a simple block: + +
+ foo +
+ +This should be a code block, though: + +
+ foo +
+ +As should this: + +
foo
+ +Now, nested: + +
+
+
+ foo +
+
+
+ +This should just be an HTML comment: + + + +Multiline: + + + +Code block: + + + +Just plain comment, with trailing spaces on the line: + + + +Code: + +
+ +Hr's: + +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+~ +

Here's a simple block:

+ +
+ foo +
+ +

This should be a code block, though:

+ +
<div>
+    foo
+</div>
+
+ +

As should this:

+ +
<div>foo</div>
+
+ +

Now, nested:

+ +
+
+
+ foo +
+
+
+ +

This should just be an HTML comment:

+ + + +

Multiline:

+ + + +

Code block:

+ +
<!-- Comment -->
+
+ +

Just plain comment, with trailing spaces on the line:

+ + + +

Code:

+ +
<hr />
+
+ +

Hr's:

+ +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+ +
+]] + +markdown_inline_html_comments = [[ +Paragraph one. + + + + + +Paragraph two. + + + +The end. +~ +

Paragraph one.

+ + + + + +

Paragraph two.

+ + + +

The end.

+]] + +markdown_links_inline_style = [[ +Just a [URL](/url/). + +[URL and title](/url/ "title"). + +[URL and title](/url/ "title preceded by two spaces"). + +[URL and title](/url/ "title preceded by a tab"). + +[Empty](). +~ +

Just a URL.

+ +

URL and title.

+ +

URL and title.

+ +

URL and title.

+ +

Empty.

+]] + +markdown_links_reference_style = [=[ +Foo [bar] [1]. + +Foo [bar][1]. + +Foo [bar] +[1]. + +[1]: /url/ "Title" + + +With [embedded [brackets]] [b]. + + +Indented [once][]. + +Indented [twice][]. + +Indented [thrice][]. + +Indented [four][] times. + + [once]: /url + + [twice]: /url + + [thrice]: /url + + [four]: /url + + +[b]: /url/ +~ +

Foo bar.

+ +

Foo bar.

+ +

Foo bar.

+ +

With embedded [brackets].

+ +

Indented once.

+ +

Indented twice.

+ +

Indented thrice.

+ +

Indented [four][] times.

+ +
[four]: /url
+
+]=] + +markdown_literal_quotes_in_text = [=[ +Foo [bar][]. + +Foo [bar](/url/ "Title with "quotes" inside"). + + + [bar]: /url/ "Title with "quotes" inside" +~ +

Foo bar.

+ +

Foo bar.

+]=] + +markdown_basics = [[ +Markdown: Basics +================ + + + + +Getting the Gist of Markdown's Formatting Syntax +------------------------------------------------ + +This page offers a brief overview of what it's like to use Markdown. +The [syntax page] [s] provides complete, detailed documentation for +every feature, but Markdown should be very easy to pick up simply by +looking at a few examples of it in action. The examples on this page +are written in a before/after style, showing example syntax and the +HTML output produced by Markdown. + +It's also helpful to simply try Markdown out; the [Dingus] [d] is a +web application that allows you type your own Markdown-formatted text +and translate it to XHTML. + +**Note:** This document is itself written using Markdown; you +can [see the source for it by adding '.text' to the URL] [src]. + + [s]: /projects/markdown/syntax "Markdown Syntax" + [d]: /projects/markdown/dingus "Markdown Dingus" + [src]: /projects/markdown/basics.text + + +## Paragraphs, Headers, Blockquotes ## + +A paragraph is simply one or more consecutive lines of text, separated +by one or more blank lines. (A blank line is any line that looks like a +blank line -- a line containing nothing spaces or tabs is considered +blank.) Normal paragraphs should not be intended with spaces or tabs. + +Markdown offers two styles of headers: *Setext* and *atx*. +Setext-style headers for `

` and `

` are created by +"underlining" with equal signs (`=`) and hyphens (`-`), respectively. +To create an atx-style header, you put 1-6 hash marks (`#`) at the +beginning of the line -- the number of hashes equals the resulting +HTML header level. + +Blockquotes are indicated using email-style '`>`' angle brackets. + +Markdown: + + A First Level Header + ==================== + + A Second Level Header + --------------------- + + Now is the time for all good men to come to + the aid of their country. This is just a + regular paragraph. + + The quick brown fox jumped over the lazy + dog's back. + + ### Header 3 + + > This is a blockquote. + > + > This is the second paragraph in the blockquote. + > + > ## This is an H2 in a blockquote + + +Output: + +

A First Level Header

+ +

A Second Level Header

+ +

Now is the time for all good men to come to + the aid of their country. This is just a + regular paragraph.

+ +

The quick brown fox jumped over the lazy + dog's back.

+ +

Header 3

+ +
+

This is a blockquote.

+ +

This is the second paragraph in the blockquote.

+ +

This is an H2 in a blockquote

+
+ + + +### Phrase Emphasis ### + +Markdown uses asterisks and underscores to indicate spans of emphasis. + +Markdown: + + Some of these words *are emphasized*. + Some of these words _are emphasized also_. + + Use two asterisks for **strong emphasis**. + Or, if you prefer, __use two underscores instead__. + +Output: + +

Some of these words are emphasized. + Some of these words are emphasized also.

+ +

Use two asterisks for strong emphasis. + Or, if you prefer, use two underscores instead.

+ + + +## Lists ## + +Unordered (bulleted) lists use asterisks, pluses, and hyphens (`*`, +`+`, and `-`) as list markers. These three markers are +interchangable; this: + + * Candy. + * Gum. + * Booze. + +this: + + + Candy. + + Gum. + + Booze. + +and this: + + - Candy. + - Gum. + - Booze. + +all produce the same output: + +
    +
  • Candy.
  • +
  • Gum.
  • +
  • Booze.
  • +
+ +Ordered (numbered) lists use regular numbers, followed by periods, as +list markers: + + 1. Red + 2. Green + 3. Blue + +Output: + +
    +
  1. Red
  2. +
  3. Green
  4. +
  5. Blue
  6. +
+ +If you put blank lines between items, you'll get `

` tags for the +list item text. You can create multi-paragraph list items by indenting +the paragraphs by 4 spaces or 1 tab: + + * A list item. + + With multiple paragraphs. + + * Another item in the list. + +Output: + +

    +
  • A list item.

    +

    With multiple paragraphs.

  • +
  • Another item in the list.

  • +
+ + + +### Links ### + +Markdown supports two styles for creating links: *inline* and +*reference*. With both styles, you use square brackets to delimit the +text you want to turn into a link. + +Inline-style links use parentheses immediately after the link text. +For example: + + This is an [example link](http://example.com/). + +Output: + +

This is an + example link.

+ +Optionally, you may include a title attribute in the parentheses: + + This is an [example link](http://example.com/ "With a Title"). + +Output: + +

This is an + example link.

+ +Reference-style links allow you to refer to your links by names, which +you define elsewhere in your document: + + I get 10 times more traffic from [Google][1] than from + [Yahoo][2] or [MSN][3]. + + [1]: http://google.com/ "Google" + [2]: http://search.yahoo.com/ "Yahoo Search" + [3]: http://search.msn.com/ "MSN Search" + +Output: + +

I get 10 times more traffic from Google than from Yahoo or MSN.

+ +The title attribute is optional. Link names may contain letters, +numbers and spaces, but are *not* case sensitive: + + I start my morning with a cup of coffee and + [The New York Times][NY Times]. + + [ny times]: http://www.nytimes.com/ + +Output: + +

I start my morning with a cup of coffee and + The New York Times.

+ + +### Images ### + +Image syntax is very much like link syntax. + +Inline (titles are optional): + + ![alt text](/path/to/img.jpg "Title") + +Reference-style: + + ![alt text][id] + + [id]: /path/to/img.jpg "Title" + +Both of the above examples produce the same output: + + alt text + + + +### Code ### + +In a regular paragraph, you can create code span by wrapping text in +backtick quotes. Any ampersands (`&`) and angle brackets (`<` or +`>`) will automatically be translated into HTML entities. This makes +it easy to use Markdown to write about HTML example code: + + I strongly recommend against using any `` tags. + + I wish SmartyPants used named entities like `—` + instead of decimal-encoded entites like `—`. + +Output: + +

I strongly recommend against using any + <blink> tags.

+ +

I wish SmartyPants used named entities like + &mdash; instead of decimal-encoded + entites like &#8212;.

+ + +To specify an entire block of pre-formatted code, indent every line of +the block by 4 spaces or 1 tab. Just like with code spans, `&`, `<`, +and `>` characters will be escaped automatically. + +Markdown: + + If you want your page to validate under XHTML 1.0 Strict, + you've got to put paragraph tags in your blockquotes: + +
+

For example.

+
+ +Output: + +

If you want your page to validate under XHTML 1.0 Strict, + you've got to put paragraph tags in your blockquotes:

+ +
<blockquote>
+        <p>For example.</p>
+    </blockquote>
+    
+~ +

Markdown: Basics

+ + + +

Getting the Gist of Markdown's Formatting Syntax

+ +

This page offers a brief overview of what it's like to use Markdown. +The syntax page provides complete, detailed documentation for +every feature, but Markdown should be very easy to pick up simply by +looking at a few examples of it in action. The examples on this page +are written in a before/after style, showing example syntax and the +HTML output produced by Markdown.

+ +

It's also helpful to simply try Markdown out; the Dingus is a +web application that allows you type your own Markdown-formatted text +and translate it to XHTML.

+ +

Note: This document is itself written using Markdown; you +can see the source for it by adding '.text' to the URL.

+ +

Paragraphs, Headers, Blockquotes

+ +

A paragraph is simply one or more consecutive lines of text, separated +by one or more blank lines. (A blank line is any line that looks like a +blank line -- a line containing nothing spaces or tabs is considered +blank.) Normal paragraphs should not be intended with spaces or tabs.

+ +

Markdown offers two styles of headers: Setext and atx. +Setext-style headers for <h1> and <h2> are created by +"underlining" with equal signs (=) and hyphens (-), respectively. +To create an atx-style header, you put 1-6 hash marks (#) at the +beginning of the line -- the number of hashes equals the resulting +HTML header level.

+ +

Blockquotes are indicated using email-style '>' angle brackets.

+ +

Markdown:

+ +
A First Level Header
+====================
+
+A Second Level Header
+---------------------
+
+Now is the time for all good men to come to
+the aid of their country. This is just a
+regular paragraph.
+
+The quick brown fox jumped over the lazy
+dog's back.
+
+### Header 3
+
+> This is a blockquote.
+> 
+> This is the second paragraph in the blockquote.
+>
+> ## This is an H2 in a blockquote
+
+ +

Output:

+ +
<h1>A First Level Header</h1>
+
+<h2>A Second Level Header</h2>
+
+<p>Now is the time for all good men to come to
+the aid of their country. This is just a
+regular paragraph.</p>
+
+<p>The quick brown fox jumped over the lazy
+dog's back.</p>
+
+<h3>Header 3</h3>
+
+<blockquote>
+    <p>This is a blockquote.</p>
+
+    <p>This is the second paragraph in the blockquote.</p>
+
+    <h2>This is an H2 in a blockquote</h2>
+</blockquote>
+
+ +

Phrase Emphasis

+ +

Markdown uses asterisks and underscores to indicate spans of emphasis.

+ +

Markdown:

+ +
Some of these words *are emphasized*.
+Some of these words _are emphasized also_.
+
+Use two asterisks for **strong emphasis**.
+Or, if you prefer, __use two underscores instead__.
+
+ +

Output:

+ +
<p>Some of these words <em>are emphasized</em>.
+Some of these words <em>are emphasized also</em>.</p>
+
+<p>Use two asterisks for <strong>strong emphasis</strong>.
+Or, if you prefer, <strong>use two underscores instead</strong>.</p>
+
+ +

Lists

+ +

Unordered (bulleted) lists use asterisks, pluses, and hyphens (*, ++, and -) as list markers. These three markers are +interchangable; this:

+ +
*   Candy.
+*   Gum.
+*   Booze.
+
+ +

this:

+ +
+   Candy.
++   Gum.
++   Booze.
+
+ +

and this:

+ +
-   Candy.
+-   Gum.
+-   Booze.
+
+ +

all produce the same output:

+ +
<ul>
+<li>Candy.</li>
+<li>Gum.</li>
+<li>Booze.</li>
+</ul>
+
+ +

Ordered (numbered) lists use regular numbers, followed by periods, as +list markers:

+ +
1.  Red
+2.  Green
+3.  Blue
+
+ +

Output:

+ +
<ol>
+<li>Red</li>
+<li>Green</li>
+<li>Blue</li>
+</ol>
+
+ +

If you put blank lines between items, you'll get <p> tags for the +list item text. You can create multi-paragraph list items by indenting +the paragraphs by 4 spaces or 1 tab:

+ +
*   A list item.
+
+    With multiple paragraphs.
+
+*   Another item in the list.
+
+ +

Output:

+ +
<ul>
+<li><p>A list item.</p>
+<p>With multiple paragraphs.</p></li>
+<li><p>Another item in the list.</p></li>
+</ul>
+
+ +

Links

+ +

Markdown supports two styles for creating links: inline and +reference. With both styles, you use square brackets to delimit the +text you want to turn into a link.

+ +

Inline-style links use parentheses immediately after the link text. +For example:

+ +
This is an [example link](http://example.com/).
+
+ +

Output:

+ +
<p>This is an <a href="http://example.com/">
+example link</a>.</p>
+
+ +

Optionally, you may include a title attribute in the parentheses:

+ +
This is an [example link](http://example.com/ "With a Title").
+
+ +

Output:

+ +
<p>This is an <a href="http://example.com/" title="With a Title">
+example link</a>.</p>
+
+ +

Reference-style links allow you to refer to your links by names, which +you define elsewhere in your document:

+ +
I get 10 times more traffic from [Google][1] than from
+[Yahoo][2] or [MSN][3].
+
+[1]: http://google.com/        "Google"
+[2]: http://search.yahoo.com/  "Yahoo Search"
+[3]: http://search.msn.com/    "MSN Search"
+
+ +

Output:

+ +
<p>I get 10 times more traffic from <a href="http://google.com/"
+title="Google">Google</a> than from <a href="http://search.yahoo.com/"
+title="Yahoo Search">Yahoo</a> or <a href="http://search.msn.com/"
+title="MSN Search">MSN</a>.</p>
+
+ +

The title attribute is optional. Link names may contain letters, +numbers and spaces, but are not case sensitive:

+ +
I start my morning with a cup of coffee and
+[The New York Times][NY Times].
+
+[ny times]: http://www.nytimes.com/
+
+ +

Output:

+ +
<p>I start my morning with a cup of coffee and
+<a href="http://www.nytimes.com/">The New York Times</a>.</p>
+
+ +

Images

+ +

Image syntax is very much like link syntax.

+ +

Inline (titles are optional):

+ +
![alt text](/path/to/img.jpg "Title")
+
+ +

Reference-style:

+ +
![alt text][id]
+
+[id]: /path/to/img.jpg "Title"
+
+ +

Both of the above examples produce the same output:

+ +
<img src="/path/to/img.jpg" alt="alt text" title="Title" />
+
+ +

Code

+ +

In a regular paragraph, you can create code span by wrapping text in +backtick quotes. Any ampersands (&) and angle brackets (< or +>) will automatically be translated into HTML entities. This makes +it easy to use Markdown to write about HTML example code:

+ +
I strongly recommend against using any `<blink>` tags.
+
+I wish SmartyPants used named entities like `&mdash;`
+instead of decimal-encoded entites like `&#8212;`.
+
+ +

Output:

+ +
<p>I strongly recommend against using any
+<code>&lt;blink&gt;</code> tags.</p>
+
+<p>I wish SmartyPants used named entities like
+<code>&amp;mdash;</code> instead of decimal-encoded
+entites like <code>&amp;#8212;</code>.</p>
+
+ +

To specify an entire block of pre-formatted code, indent every line of +the block by 4 spaces or 1 tab. Just like with code spans, &, <, +and > characters will be escaped automatically.

+ +

Markdown:

+ +
If you want your page to validate under XHTML 1.0 Strict,
+you've got to put paragraph tags in your blockquotes:
+
+    <blockquote>
+        <p>For example.</p>
+    </blockquote>
+
+ +

Output:

+ +
<p>If you want your page to validate under XHTML 1.0 Strict,
+you've got to put paragraph tags in your blockquotes:</p>
+
+<pre><code>&lt;blockquote&gt;
+    &lt;p&gt;For example.&lt;/p&gt;
+&lt;/blockquote&gt;
+</code></pre>
+
+]] + +markdown_syntax = [[ +Markdown: Syntax +================ + + + + +* [Overview](#overview) + * [Philosophy](#philosophy) + * [Inline HTML](#html) + * [Automatic Escaping for Special Characters](#autoescape) +* [Block Elements](#block) + * [Paragraphs and Line Breaks](#p) + * [Headers](#header) + * [Blockquotes](#blockquote) + * [Lists](#list) + * [Code Blocks](#precode) + * [Horizontal Rules](#hr) +* [Span Elements](#span) + * [Links](#link) + * [Emphasis](#em) + * [Code](#code) + * [Images](#img) +* [Miscellaneous](#misc) + * [Backslash Escapes](#backslash) + * [Automatic Links](#autolink) + + +**Note:** This document is itself written using Markdown; you +can [see the source for it by adding '.text' to the URL][src]. + + [src]: /projects/markdown/syntax.text + +* * * + +

Overview

+ +

Philosophy

+ +Markdown is intended to be as easy-to-read and easy-to-write as is feasible. + +Readability, however, is emphasized above all else. A Markdown-formatted +document should be publishable as-is, as plain text, without looking +like it's been marked up with tags or formatting instructions. While +Markdown's syntax has been influenced by several existing text-to-HTML +filters -- including [Setext] [1], [atx] [2], [Textile] [3], [reStructuredText] [4], +[Grutatext] [5], and [EtText] [6] -- the single biggest source of +inspiration for Markdown's syntax is the format of plain text email. + + [1]: http://docutils.sourceforge.net/mirror/setext.html + [2]: http://www.aaronsw.com/2002/atx/ + [3]: http://textism.com/tools/textile/ + [4]: http://docutils.sourceforge.net/rst.html + [5]: http://www.triptico.com/software/grutatxt.html + [6]: http://ettext.taint.org/doc/ + +To this end, Markdown's syntax is comprised entirely of punctuation +characters, which punctuation characters have been carefully chosen so +as to look like what they mean. E.g., asterisks around a word actually +look like \*emphasis\*. Markdown lists look like, well, lists. Even +blockquotes look like quoted passages of text, assuming you've ever +used email. + + + +

Inline HTML

+ +Markdown's syntax is intended for one purpose: to be used as a +format for *writing* for the web. + +Markdown is not a replacement for HTML, or even close to it. Its +syntax is very small, corresponding only to a very small subset of +HTML tags. The idea is *not* to create a syntax that makes it easier +to insert HTML tags. In my opinion, HTML tags are already easy to +insert. The idea for Markdown is to make it easy to read, write, and +edit prose. HTML is a *publishing* format; Markdown is a *writing* +format. Thus, Markdown's formatting syntax only addresses issues that +can be conveyed in plain text. + +For any markup that is not covered by Markdown's syntax, you simply +use HTML itself. There's no need to preface it or delimit it to +indicate that you're switching from Markdown to HTML; you just use +the tags. + +The only restrictions are that block-level HTML elements -- e.g. `
`, +``, `
`, `

`, etc. -- must be separated from surrounding +content by blank lines, and the start and end tags of the block should +not be indented with tabs or spaces. Markdown is smart enough not +to add extra (unwanted) `

` tags around HTML block-level tags. + +For example, to add an HTML table to a Markdown article: + + This is a regular paragraph. + +

+ + + +
Foo
+ + This is another regular paragraph. + +Note that Markdown formatting syntax is not processed within block-level +HTML tags. E.g., you can't use Markdown-style `*emphasis*` inside an +HTML block. + +Span-level HTML tags -- e.g. ``, ``, or `` -- can be +used anywhere in a Markdown paragraph, list item, or header. If you +want, you can even use HTML tags instead of Markdown formatting; e.g. if +you'd prefer to use HTML `` or `` tags instead of Markdown's +link or image syntax, go right ahead. + +Unlike block-level HTML tags, Markdown syntax *is* processed within +span-level tags. + + +

Automatic Escaping for Special Characters

+ +In HTML, there are two characters that demand special treatment: `<` +and `&`. Left angle brackets are used to start tags; ampersands are +used to denote HTML entities. If you want to use them as literal +characters, you must escape them as entities, e.g. `<`, and +`&`. + +Ampersands in particular are bedeviling for web writers. If you want to +write about 'AT&T', you need to write '`AT&T`'. You even need to +escape ampersands within URLs. Thus, if you want to link to: + + http://images.google.com/images?num=30&q=larry+bird + +you need to encode the URL as: + + http://images.google.com/images?num=30&q=larry+bird + +in your anchor tag `href` attribute. Needless to say, this is easy to +forget, and is probably the single most common source of HTML validation +errors in otherwise well-marked-up web sites. + +Markdown allows you to use these characters naturally, taking care of +all the necessary escaping for you. If you use an ampersand as part of +an HTML entity, it remains unchanged; otherwise it will be translated +into `&`. + +So, if you want to include a copyright symbol in your article, you can write: + + © + +and Markdown will leave it alone. But if you write: + + AT&T + +Markdown will translate it to: + + AT&T + +Similarly, because Markdown supports [inline HTML](#html), if you use +angle brackets as delimiters for HTML tags, Markdown will treat them as +such. But if you write: + + 4 < 5 + +Markdown will translate it to: + + 4 < 5 + +However, inside Markdown code spans and blocks, angle brackets and +ampersands are *always* encoded automatically. This makes it easy to use +Markdown to write about HTML code. (As opposed to raw HTML, which is a +terrible format for writing about HTML syntax, because every single `<` +and `&` in your example code needs to be escaped.) + + +* * * + + +

Block Elements

+ + +

Paragraphs and Line Breaks

+ +A paragraph is simply one or more consecutive lines of text, separated +by one or more blank lines. (A blank line is any line that looks like a +blank line -- a line containing nothing but spaces or tabs is considered +blank.) Normal paragraphs should not be intended with spaces or tabs. + +The implication of the "one or more consecutive lines of text" rule is +that Markdown supports "hard-wrapped" text paragraphs. This differs +significantly from most other text-to-HTML formatters (including Movable +Type's "Convert Line Breaks" option) which translate every line break +character in a paragraph into a `
` tag. + +When you *do* want to insert a `
` break tag using Markdown, you +end a line with two or more spaces, then type return. + +Yes, this takes a tad more effort to create a `
`, but a simplistic +"every line break is a `
`" rule wouldn't work for Markdown. +Markdown's email-style [blockquoting][bq] and multi-paragraph [list items][l] +work best -- and look better -- when you format them with hard breaks. + + [bq]: #blockquote + [l]: #list + + + + + +Markdown supports two styles of headers, [Setext] [1] and [atx] [2]. + +Setext-style headers are "underlined" using equal signs (for first-level +headers) and dashes (for second-level headers). For example: + + This is an H1 + ============= + + This is an H2 + ------------- + +Any number of underlining `=`'s or `-`'s will work. + +Atx-style headers use 1-6 hash characters at the start of the line, +corresponding to header levels 1-6. For example: + + # This is an H1 + + ## This is an H2 + + ###### This is an H6 + +Optionally, you may "close" atx-style headers. This is purely +cosmetic -- you can use this if you think it looks better. The +closing hashes don't even need to match the number of hashes +used to open the header. (The number of opening hashes +determines the header level.) : + + # This is an H1 # + + ## This is an H2 ## + + ### This is an H3 ###### + + +

Blockquotes

+ +Markdown uses email-style `>` characters for blockquoting. If you're +familiar with quoting passages of text in an email message, then you +know how to create a blockquote in Markdown. It looks best if you hard +wrap the text and put a `>` before every line: + + > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, + > consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. + > Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. + > + > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse + > id sem consectetuer libero luctus adipiscing. + +Markdown allows you to be lazy and only put the `>` before the first +line of a hard-wrapped paragraph: + + > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, + consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. + Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. + + > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse + id sem consectetuer libero luctus adipiscing. + +Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by +adding additional levels of `>`: + + > This is the first level of quoting. + > + > > This is nested blockquote. + > + > Back to the first level. + +Blockquotes can contain other Markdown elements, including headers, lists, +and code blocks: + + > ## This is a header. + > + > 1. This is the first list item. + > 2. This is the second list item. + > + > Here's some example code: + > + > return shell_exec("echo $input | $markdown_script"); + +Any decent text editor should make email-style quoting easy. For +example, with BBEdit, you can make a selection and choose Increase +Quote Level from the Text menu. + + +

Lists

+ +Markdown supports ordered (numbered) and unordered (bulleted) lists. + +Unordered lists use asterisks, pluses, and hyphens -- interchangably +-- as list markers: + + * Red + * Green + * Blue + +is equivalent to: + + + Red + + Green + + Blue + +and: + + - Red + - Green + - Blue + +Ordered lists use numbers followed by periods: + + 1. Bird + 2. McHale + 3. Parish + +It's important to note that the actual numbers you use to mark the +list have no effect on the HTML output Markdown produces. The HTML +Markdown produces from the above list is: + +
    +
  1. Bird
  2. +
  3. McHale
  4. +
  5. Parish
  6. +
+ +If you instead wrote the list in Markdown like this: + + 1. Bird + 1. McHale + 1. Parish + +or even: + + 3. Bird + 1. McHale + 8. Parish + +you'd get the exact same HTML output. The point is, if you want to, +you can use ordinal numbers in your ordered Markdown lists, so that +the numbers in your source match the numbers in your published HTML. +But if you want to be lazy, you don't have to. + +If you do use lazy list numbering, however, you should still start the +list with the number 1. At some point in the future, Markdown may support +starting ordered lists at an arbitrary number. + +List markers typically start at the left margin, but may be indented by +up to three spaces. List markers must be followed by one or more spaces +or a tab. + +To make lists look nice, you can wrap items with hanging indents: + + * Lorem ipsum dolor sit amet, consectetuer adipiscing elit. + Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, + viverra nec, fringilla in, laoreet vitae, risus. + * Donec sit amet nisl. Aliquam semper ipsum sit amet velit. + Suspendisse id sem consectetuer libero luctus adipiscing. + +But if you want to be lazy, you don't have to: + + * Lorem ipsum dolor sit amet, consectetuer adipiscing elit. + Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, + viverra nec, fringilla in, laoreet vitae, risus. + * Donec sit amet nisl. Aliquam semper ipsum sit amet velit. + Suspendisse id sem consectetuer libero luctus adipiscing. + +If list items are separated by blank lines, Markdown will wrap the +items in `

` tags in the HTML output. For example, this input: + + * Bird + * Magic + +will turn into: + +

    +
  • Bird
  • +
  • Magic
  • +
+ +But this: + + * Bird + + * Magic + +will turn into: + +
    +
  • Bird

  • +
  • Magic

  • +
+ +List items may consist of multiple paragraphs. Each subsequent +paragraph in a list item must be intended by either 4 spaces +or one tab: + + 1. This is a list item with two paragraphs. Lorem ipsum dolor + sit amet, consectetuer adipiscing elit. Aliquam hendrerit + mi posuere lectus. + + Vestibulum enim wisi, viverra nec, fringilla in, laoreet + vitae, risus. Donec sit amet nisl. Aliquam semper ipsum + sit amet velit. + + 2. Suspendisse id sem consectetuer libero luctus adipiscing. + +It looks nice if you indent every line of the subsequent +paragraphs, but here again, Markdown will allow you to be +lazy: + + * This is a list item with two paragraphs. + + This is the second paragraph in the list item. You're + only required to indent the first line. Lorem ipsum dolor + sit amet, consectetuer adipiscing elit. + + * Another item in the same list. + +To put a blockquote within a list item, the blockquote's `>` +delimiters need to be indented: + + * A list item with a blockquote: + + > This is a blockquote + > inside a list item. + +To put a code block within a list item, the code block needs +to be indented *twice* -- 8 spaces or two tabs: + + * A list item with a code block: + + + + +It's worth noting that it's possible to trigger an ordered list by +accident, by writing something like this: + + 1986. What a great season. + +In other words, a *number-period-space* sequence at the beginning of a +line. To avoid this, you can backslash-escape the period: + + 1986\. What a great season. + + + +

Code Blocks

+ +Pre-formatted code blocks are used for writing about programming or +markup source code. Rather than forming normal paragraphs, the lines +of a code block are interpreted literally. Markdown wraps a code block +in both `
` and `` tags.
+
+To produce a code block in Markdown, simply indent every line of the
+block by at least 4 spaces or 1 tab. For example, given this input:
+
+    This is a normal paragraph:
+
+        This is a code block.
+
+Markdown will generate:
+
+    

This is a normal paragraph:

+ +
This is a code block.
+    
+ +One level of indentation -- 4 spaces or 1 tab -- is removed from each +line of the code block. For example, this: + + Here is an example of AppleScript: + + tell application "Foo" + beep + end tell + +will turn into: + +

Here is an example of AppleScript:

+ +
tell application "Foo"
+        beep
+    end tell
+    
+ +A code block continues until it reaches a line that is not indented +(or the end of the article). + +Within a code block, ampersands (`&`) and angle brackets (`<` and `>`) +are automatically converted into HTML entities. This makes it very +easy to include example HTML source code using Markdown -- just paste +it and indent it, and Markdown will handle the hassle of encoding the +ampersands and angle brackets. For example, this: + + + +will turn into: + +
<div class="footer">
+        &copy; 2004 Foo Corporation
+    </div>
+    
+ +Regular Markdown syntax is not processed within code blocks. E.g., +asterisks are just literal asterisks within a code block. This means +it's also easy to use Markdown to write about Markdown's own syntax. + + + +

Horizontal Rules

+ +You can produce a horizontal rule tag (`
`) by placing three or +more hyphens, asterisks, or underscores on a line by themselves. If you +wish, you may use spaces between the hyphens or asterisks. Each of the +following lines will produce a horizontal rule: + + * * * + + *** + + ***** + + - - - + + --------------------------------------- + + _ _ _ + + +* * * + +

Span Elements

+ + + +Markdown supports two style of links: *inline* and *reference*. + +In both styles, the link text is delimited by [square brackets]. + +To create an inline link, use a set of regular parentheses immediately +after the link text's closing square bracket. Inside the parentheses, +put the URL where you want the link to point, along with an *optional* +title for the link, surrounded in quotes. For example: + + This is [an example](http://example.com/ "Title") inline link. + + [This link](http://example.net/) has no title attribute. + +Will produce: + +

This is + an example inline link.

+ +

This link has no + title attribute.

+ +If you're referring to a local resource on the same server, you can +use relative paths: + + See my [About](/about/) page for details. + +Reference-style links use a second set of square brackets, inside +which you place a label of your choosing to identify the link: + + This is [an example][id] reference-style link. + +You can optionally use a space to separate the sets of brackets: + + This is [an example] [id] reference-style link. + +Then, anywhere in the document, you define your link label like this, +on a line by itself: + + [id]: http://example.com/ "Optional Title Here" + +That is: + +* Square brackets containing the link identifier (optionally + indented from the left margin using up to three spaces); +* followed by a colon; +* followed by one or more spaces (or tabs); +* followed by the URL for the link; +* optionally followed by a title attribute for the link, enclosed + in double or single quotes. + +The link URL may, optionally, be surrounded by angle brackets: + + [id]: "Optional Title Here" + +You can put the title attribute on the next line and use extra spaces +or tabs for padding, which tends to look better with longer URLs: + + [id]: http://example.com/longish/path/to/resource/here + "Optional Title Here" + +Link definitions are only used for creating links during Markdown +processing, and are stripped from your document in the HTML output. + +Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are *not* case sensitive. E.g. these two links: + + [link text][a] + [link text][A] + +are equivalent. + +The *implicit link name* shortcut allows you to omit the name of the +link, in which case the link text itself is used as the name. +Just use an empty set of square brackets -- e.g., to link the word +"Google" to the google.com web site, you could simply write: + + [Google][] + +And then define the link: + + [Google]: http://google.com/ + +Because link names may contain spaces, this shortcut even works for +multiple words in the link text: + + Visit [Daring Fireball][] for more information. + +And then define the link: + + [Daring Fireball]: http://daringfireball.net/ + +Link definitions can be placed anywhere in your Markdown document. I +tend to put them immediately after each paragraph in which they're +used, but if you want, you can put them all at the end of your +document, sort of like footnotes. + +Here's an example of reference links in action: + + I get 10 times more traffic from [Google] [1] than from + [Yahoo] [2] or [MSN] [3]. + + [1]: http://google.com/ "Google" + [2]: http://search.yahoo.com/ "Yahoo Search" + [3]: http://search.msn.com/ "MSN Search" + +Using the implicit link name shortcut, you could instead write: + + I get 10 times more traffic from [Google][] than from + [Yahoo][] or [MSN][]. + + [google]: http://google.com/ "Google" + [yahoo]: http://search.yahoo.com/ "Yahoo Search" + [msn]: http://search.msn.com/ "MSN Search" + +Both of the above examples will produce the following HTML output: + +

I get 10 times more traffic from Google than from + Yahoo + or MSN.

+ +For comparison, here is the same paragraph written using +Markdown's inline link style: + + I get 10 times more traffic from [Google](http://google.com/ "Google") + than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or + [MSN](http://search.msn.com/ "MSN Search"). + +The point of reference-style links is not that they're easier to +write. The point is that with reference-style links, your document +source is vastly more readable. Compare the above examples: using +reference-style links, the paragraph itself is only 81 characters +long; with inline-style links, it's 176 characters; and as raw HTML, +it's 234 characters. In the raw HTML, there's more markup than there +is text. + +With Markdown's reference-style links, a source document much more +closely resembles the final output, as rendered in a browser. By +allowing you to move the markup-related metadata out of the paragraph, +you can add links without interrupting the narrative flow of your +prose. + + +

Emphasis

+ +Markdown treats asterisks (`*`) and underscores (`_`) as indicators of +emphasis. Text wrapped with one `*` or `_` will be wrapped with an +HTML `` tag; double `*`'s or `_`'s will be wrapped with an HTML +`` tag. E.g., this input: + + *single asterisks* + + _single underscores_ + + **double asterisks** + + __double underscores__ + +will produce: + + single asterisks + + single underscores + + double asterisks + + double underscores + +You can use whichever style you prefer; the lone restriction is that +the same character must be used to open and close an emphasis span. + +Emphasis can be used in the middle of a word: + + un*fucking*believable + +But if you surround an `*` or `_` with spaces, it'll be treated as a +literal asterisk or underscore. + +To produce a literal asterisk or underscore at a position where it +would otherwise be used as an emphasis delimiter, you can backslash +escape it: + + \*this text is surrounded by literal asterisks\* + + + +

Code

+ +To indicate a span of code, wrap it with backtick quotes (`` ` ``). +Unlike a pre-formatted code block, a code span indicates code within a +normal paragraph. For example: + + Use the `printf()` function. + +will produce: + +

Use the printf() function.

+ +To include a literal backtick character within a code span, you can use +multiple backticks as the opening and closing delimiters: + + ``There is a literal backtick (`) here.`` + +which will produce this: + +

There is a literal backtick (`) here.

+ +The backtick delimiters surrounding a code span may include spaces -- +one after the opening, one before the closing. This allows you to place +literal backtick characters at the beginning or end of a code span: + + A single backtick in a code span: `` ` `` + + A backtick-delimited string in a code span: `` `foo` `` + +will produce: + +

A single backtick in a code span: `

+ +

A backtick-delimited string in a code span: `foo`

+ +With a code span, ampersands and angle brackets are encoded as HTML +entities automatically, which makes it easy to include example HTML +tags. Markdown will turn this: + + Please don't use any `` tags. + +into: + +

Please don't use any <blink> tags.

+ +You can write this: + + `—` is the decimal-encoded equivalent of `—`. + +to produce: + +

&#8212; is the decimal-encoded + equivalent of &mdash;.

+ + + +

Images

+ +Admittedly, it's fairly difficult to devise a "natural" syntax for +placing images into a plain text document format. + +Markdown uses an image syntax that is intended to resemble the syntax +for links, allowing for two styles: *inline* and *reference*. + +Inline image syntax looks like this: + + ![Alt text](/path/to/img.jpg) + + ![Alt text](/path/to/img.jpg "Optional title") + +That is: + +* An exclamation mark: `!`; +* followed by a set of square brackets, containing the `alt` + attribute text for the image; +* followed by a set of parentheses, containing the URL or path to + the image, and an optional `title` attribute enclosed in double + or single quotes. + +Reference-style image syntax looks like this: + + ![Alt text][id] + +Where "id" is the name of a defined image reference. Image references +are defined using syntax identical to link references: + + [id]: url/to/image "Optional title attribute" + +As of this writing, Markdown has no syntax for specifying the +dimensions of an image; if this is important to you, you can simply +use regular HTML `` tags. + + +* * * + + +

Miscellaneous

+ + + +Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this: + + + +Markdown will turn this into: + + http://example.com/ + +Automatic links for email addresses work similarly, except that +Markdown will also perform a bit of randomized decimal and hex +entity-encoding to help obscure your address from address-harvesting +spambots. For example, Markdown will turn this: + + + +into something like this: + + address@exa + mple.com + +which will render in a browser as a clickable link to "address@example.com". + +(This sort of entity-encoding trick will indeed fool many, if not +most, address-harvesting bots, but it definitely won't fool all of +them. It's better than nothing, but an address published in this way +will probably eventually start receiving spam.) + + + +

Backslash Escapes

+ +Markdown allows you to use backslash escapes to generate literal +characters which would otherwise have special meaning in Markdown's +formatting syntax. For example, if you wanted to surround a word with +literal asterisks (instead of an HTML `` tag), you can backslashes +before the asterisks, like this: + + \*literal asterisks\* + +Markdown provides backslash escapes for the following characters: + + \ backslash + ` backtick + * asterisk + _ underscore + {} curly braces + [] square brackets + () parentheses + # hash mark + + plus sign + - minus sign (hyphen) + . dot + ! exclamation mark +~ +

Markdown: Syntax

+ + + + + +

Note: This document is itself written using Markdown; you +can see the source for it by adding '.text' to the URL.

+ +
+ +

Overview

+ +

Philosophy

+ +

Markdown is intended to be as easy-to-read and easy-to-write as is feasible.

+ +

Readability, however, is emphasized above all else. A Markdown-formatted +document should be publishable as-is, as plain text, without looking +like it's been marked up with tags or formatting instructions. While +Markdown's syntax has been influenced by several existing text-to-HTML +filters -- including Setext, atx, Textile, reStructuredText, +Grutatext, and EtText -- the single biggest source of +inspiration for Markdown's syntax is the format of plain text email.

+ +

To this end, Markdown's syntax is comprised entirely of punctuation +characters, which punctuation characters have been carefully chosen so +as to look like what they mean. E.g., asterisks around a word actually +look like *emphasis*. Markdown lists look like, well, lists. Even +blockquotes look like quoted passages of text, assuming you've ever +used email.

+ +

Inline HTML

+ +

Markdown's syntax is intended for one purpose: to be used as a +format for writing for the web.

+ +

Markdown is not a replacement for HTML, or even close to it. Its +syntax is very small, corresponding only to a very small subset of +HTML tags. The idea is not to create a syntax that makes it easier +to insert HTML tags. In my opinion, HTML tags are already easy to +insert. The idea for Markdown is to make it easy to read, write, and +edit prose. HTML is a publishing format; Markdown is a writing +format. Thus, Markdown's formatting syntax only addresses issues that +can be conveyed in plain text.

+ +

For any markup that is not covered by Markdown's syntax, you simply +use HTML itself. There's no need to preface it or delimit it to +indicate that you're switching from Markdown to HTML; you just use +the tags.

+ +

The only restrictions are that block-level HTML elements -- e.g. <div>, +<table>, <pre>, <p>, etc. -- must be separated from surrounding +content by blank lines, and the start and end tags of the block should +not be indented with tabs or spaces. Markdown is smart enough not +to add extra (unwanted) <p> tags around HTML block-level tags.

+ +

For example, to add an HTML table to a Markdown article:

+ +
This is a regular paragraph.
+
+<table>
+    <tr>
+        <td>Foo</td>
+    </tr>
+</table>
+
+This is another regular paragraph.
+
+ +

Note that Markdown formatting syntax is not processed within block-level +HTML tags. E.g., you can't use Markdown-style *emphasis* inside an +HTML block.

+ +

Span-level HTML tags -- e.g. <span>, <cite>, or <del> -- can be +used anywhere in a Markdown paragraph, list item, or header. If you +want, you can even use HTML tags instead of Markdown formatting; e.g. if +you'd prefer to use HTML <a> or <img> tags instead of Markdown's +link or image syntax, go right ahead.

+ +

Unlike block-level HTML tags, Markdown syntax is processed within +span-level tags.

+ +

Automatic Escaping for Special Characters

+ +

In HTML, there are two characters that demand special treatment: < +and &. Left angle brackets are used to start tags; ampersands are +used to denote HTML entities. If you want to use them as literal +characters, you must escape them as entities, e.g. &lt;, and +&amp;.

+ +

Ampersands in particular are bedeviling for web writers. If you want to +write about 'AT&T', you need to write 'AT&amp;T'. You even need to +escape ampersands within URLs. Thus, if you want to link to:

+ +
http://images.google.com/images?num=30&q=larry+bird
+
+ +

you need to encode the URL as:

+ +
http://images.google.com/images?num=30&amp;q=larry+bird
+
+ +

in your anchor tag href attribute. Needless to say, this is easy to +forget, and is probably the single most common source of HTML validation +errors in otherwise well-marked-up web sites.

+ +

Markdown allows you to use these characters naturally, taking care of +all the necessary escaping for you. If you use an ampersand as part of +an HTML entity, it remains unchanged; otherwise it will be translated +into &amp;.

+ +

So, if you want to include a copyright symbol in your article, you can write:

+ +
&copy;
+
+ +

and Markdown will leave it alone. But if you write:

+ +
AT&T
+
+ +

Markdown will translate it to:

+ +
AT&amp;T
+
+ +

Similarly, because Markdown supports inline HTML, if you use +angle brackets as delimiters for HTML tags, Markdown will treat them as +such. But if you write:

+ +
4 < 5
+
+ +

Markdown will translate it to:

+ +
4 &lt; 5
+
+ +

However, inside Markdown code spans and blocks, angle brackets and +ampersands are always encoded automatically. This makes it easy to use +Markdown to write about HTML code. (As opposed to raw HTML, which is a +terrible format for writing about HTML syntax, because every single < +and & in your example code needs to be escaped.)

+ +
+ +

Block Elements

+ +

Paragraphs and Line Breaks

+ +

A paragraph is simply one or more consecutive lines of text, separated +by one or more blank lines. (A blank line is any line that looks like a +blank line -- a line containing nothing but spaces or tabs is considered +blank.) Normal paragraphs should not be intended with spaces or tabs.

+ +

The implication of the "one or more consecutive lines of text" rule is +that Markdown supports "hard-wrapped" text paragraphs. This differs +significantly from most other text-to-HTML formatters (including Movable +Type's "Convert Line Breaks" option) which translate every line break +character in a paragraph into a <br /> tag.

+ +

When you do want to insert a <br /> break tag using Markdown, you +end a line with two or more spaces, then type return.

+ +

Yes, this takes a tad more effort to create a <br />, but a simplistic +"every line break is a <br />" rule wouldn't work for Markdown. +Markdown's email-style blockquoting and multi-paragraph list items +work best -- and look better -- when you format them with hard breaks.

+ + + +

Markdown supports two styles of headers, Setext and atx.

+ +

Setext-style headers are "underlined" using equal signs (for first-level +headers) and dashes (for second-level headers). For example:

+ +
This is an H1
+=============
+
+This is an H2
+-------------
+
+ +

Any number of underlining ='s or -'s will work.

+ +

Atx-style headers use 1-6 hash characters at the start of the line, +corresponding to header levels 1-6. For example:

+ +
# This is an H1
+
+## This is an H2
+
+###### This is an H6
+
+ +

Optionally, you may "close" atx-style headers. This is purely +cosmetic -- you can use this if you think it looks better. The +closing hashes don't even need to match the number of hashes +used to open the header. (The number of opening hashes +determines the header level.) :

+ +
# This is an H1 #
+
+## This is an H2 ##
+
+### This is an H3 ######
+
+ +

Blockquotes

+ +

Markdown uses email-style > characters for blockquoting. If you're +familiar with quoting passages of text in an email message, then you +know how to create a blockquote in Markdown. It looks best if you hard +wrap the text and put a > before every line:

+ +
> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
+> consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
+> Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
+> 
+> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
+> id sem consectetuer libero luctus adipiscing.
+
+ +

Markdown allows you to be lazy and only put the > before the first +line of a hard-wrapped paragraph:

+ +
> This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
+consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
+Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
+
+> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
+id sem consectetuer libero luctus adipiscing.
+
+ +

Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by +adding additional levels of >:

+ +
> This is the first level of quoting.
+>
+> > This is nested blockquote.
+>
+> Back to the first level.
+
+ +

Blockquotes can contain other Markdown elements, including headers, lists, +and code blocks:

+ +
> ## This is a header.
+> 
+> 1.   This is the first list item.
+> 2.   This is the second list item.
+> 
+> Here's some example code:
+> 
+>     return shell_exec("echo $input | $markdown_script");
+
+ +

Any decent text editor should make email-style quoting easy. For +example, with BBEdit, you can make a selection and choose Increase +Quote Level from the Text menu.

+ +

Lists

+ +

Markdown supports ordered (numbered) and unordered (bulleted) lists.

+ +

Unordered lists use asterisks, pluses, and hyphens -- interchangably +-- as list markers:

+ +
*   Red
+*   Green
+*   Blue
+
+ +

is equivalent to:

+ +
+   Red
++   Green
++   Blue
+
+ +

and:

+ +
-   Red
+-   Green
+-   Blue
+
+ +

Ordered lists use numbers followed by periods:

+ +
1.  Bird
+2.  McHale
+3.  Parish
+
+ +

It's important to note that the actual numbers you use to mark the +list have no effect on the HTML output Markdown produces. The HTML +Markdown produces from the above list is:

+ +
<ol>
+<li>Bird</li>
+<li>McHale</li>
+<li>Parish</li>
+</ol>
+
+ +

If you instead wrote the list in Markdown like this:

+ +
1.  Bird
+1.  McHale
+1.  Parish
+
+ +

or even:

+ +
3. Bird
+1. McHale
+8. Parish
+
+ +

you'd get the exact same HTML output. The point is, if you want to, +you can use ordinal numbers in your ordered Markdown lists, so that +the numbers in your source match the numbers in your published HTML. +But if you want to be lazy, you don't have to.

+ +

If you do use lazy list numbering, however, you should still start the +list with the number 1. At some point in the future, Markdown may support +starting ordered lists at an arbitrary number.

+ +

List markers typically start at the left margin, but may be indented by +up to three spaces. List markers must be followed by one or more spaces +or a tab.

+ +

To make lists look nice, you can wrap items with hanging indents:

+ +
*   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
+    Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
+    viverra nec, fringilla in, laoreet vitae, risus.
+*   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
+    Suspendisse id sem consectetuer libero luctus adipiscing.
+
+ +

But if you want to be lazy, you don't have to:

+ +
*   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
+Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
+viverra nec, fringilla in, laoreet vitae, risus.
+*   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
+Suspendisse id sem consectetuer libero luctus adipiscing.
+
+ +

If list items are separated by blank lines, Markdown will wrap the +items in <p> tags in the HTML output. For example, this input:

+ +
*   Bird
+*   Magic
+
+ +

will turn into:

+ +
<ul>
+<li>Bird</li>
+<li>Magic</li>
+</ul>
+
+ +

But this:

+ +
*   Bird
+
+*   Magic
+
+ +

will turn into:

+ +
<ul>
+<li><p>Bird</p></li>
+<li><p>Magic</p></li>
+</ul>
+
+ +

List items may consist of multiple paragraphs. Each subsequent +paragraph in a list item must be intended by either 4 spaces +or one tab:

+ +
1.  This is a list item with two paragraphs. Lorem ipsum dolor
+    sit amet, consectetuer adipiscing elit. Aliquam hendrerit
+    mi posuere lectus.
+
+    Vestibulum enim wisi, viverra nec, fringilla in, laoreet
+    vitae, risus. Donec sit amet nisl. Aliquam semper ipsum
+    sit amet velit.
+
+2.  Suspendisse id sem consectetuer libero luctus adipiscing.
+
+ +

It looks nice if you indent every line of the subsequent +paragraphs, but here again, Markdown will allow you to be +lazy:

+ +
*   This is a list item with two paragraphs.
+
+    This is the second paragraph in the list item. You're
+only required to indent the first line. Lorem ipsum dolor
+sit amet, consectetuer adipiscing elit.
+
+*   Another item in the same list.
+
+ +

To put a blockquote within a list item, the blockquote's > +delimiters need to be indented:

+ +
*   A list item with a blockquote:
+
+    > This is a blockquote
+    > inside a list item.
+
+ +

To put a code block within a list item, the code block needs +to be indented twice -- 8 spaces or two tabs:

+ +
*   A list item with a code block:
+
+        <code goes here>
+
+ +

It's worth noting that it's possible to trigger an ordered list by +accident, by writing something like this:

+ +
1986. What a great season.
+
+ +

In other words, a number-period-space sequence at the beginning of a +line. To avoid this, you can backslash-escape the period:

+ +
1986\. What a great season.
+
+ +

Code Blocks

+ +

Pre-formatted code blocks are used for writing about programming or +markup source code. Rather than forming normal paragraphs, the lines +of a code block are interpreted literally. Markdown wraps a code block +in both <pre> and <code> tags.

+ +

To produce a code block in Markdown, simply indent every line of the +block by at least 4 spaces or 1 tab. For example, given this input:

+ +
This is a normal paragraph:
+
+    This is a code block.
+
+ +

Markdown will generate:

+ +
<p>This is a normal paragraph:</p>
+
+<pre><code>This is a code block.
+</code></pre>
+
+ +

One level of indentation -- 4 spaces or 1 tab -- is removed from each +line of the code block. For example, this:

+ +
Here is an example of AppleScript:
+
+    tell application "Foo"
+        beep
+    end tell
+
+ +

will turn into:

+ +
<p>Here is an example of AppleScript:</p>
+
+<pre><code>tell application "Foo"
+    beep
+end tell
+</code></pre>
+
+ +

A code block continues until it reaches a line that is not indented +(or the end of the article).

+ +

Within a code block, ampersands (&) and angle brackets (< and >) +are automatically converted into HTML entities. This makes it very +easy to include example HTML source code using Markdown -- just paste +it and indent it, and Markdown will handle the hassle of encoding the +ampersands and angle brackets. For example, this:

+ +
    <div class="footer">
+        &copy; 2004 Foo Corporation
+    </div>
+
+ +

will turn into:

+ +
<pre><code>&lt;div class="footer"&gt;
+    &amp;copy; 2004 Foo Corporation
+&lt;/div&gt;
+</code></pre>
+
+ +

Regular Markdown syntax is not processed within code blocks. E.g., +asterisks are just literal asterisks within a code block. This means +it's also easy to use Markdown to write about Markdown's own syntax.

+ +

Horizontal Rules

+ +

You can produce a horizontal rule tag (<hr />) by placing three or +more hyphens, asterisks, or underscores on a line by themselves. If you +wish, you may use spaces between the hyphens or asterisks. Each of the +following lines will produce a horizontal rule:

+ +
* * *
+
+***
+
+*****
+
+- - -
+
+---------------------------------------
+
+_ _ _
+
+ +
+ +

Span Elements

+ + + +

Markdown supports two style of links: inline and reference.

+ +

In both styles, the link text is delimited by [square brackets].

+ +

To create an inline link, use a set of regular parentheses immediately +after the link text's closing square bracket. Inside the parentheses, +put the URL where you want the link to point, along with an optional +title for the link, surrounded in quotes. For example:

+ +
This is [an example](http://example.com/ "Title") inline link.
+
+[This link](http://example.net/) has no title attribute.
+
+ +

Will produce:

+ +
<p>This is <a href="http://example.com/" title="Title">
+an example</a> inline link.</p>
+
+<p><a href="http://example.net/">This link</a> has no
+title attribute.</p>
+
+ +

If you're referring to a local resource on the same server, you can +use relative paths:

+ +
See my [About](/about/) page for details.
+
+ +

Reference-style links use a second set of square brackets, inside +which you place a label of your choosing to identify the link:

+ +
This is [an example][id] reference-style link.
+
+ +

You can optionally use a space to separate the sets of brackets:

+ +
This is [an example] [id] reference-style link.
+
+ +

Then, anywhere in the document, you define your link label like this, +on a line by itself:

+ +
[id]: http://example.com/  "Optional Title Here"
+
+ +

That is:

+ +
    +
  • Square brackets containing the link identifier (optionally +indented from the left margin using up to three spaces);
  • +
  • followed by a colon;
  • +
  • followed by one or more spaces (or tabs);
  • +
  • followed by the URL for the link;
  • +
  • optionally followed by a title attribute for the link, enclosed +in double or single quotes.
  • +
+ +

The link URL may, optionally, be surrounded by angle brackets:

+ +
[id]: <http://example.com/>  "Optional Title Here"
+
+ +

You can put the title attribute on the next line and use extra spaces +or tabs for padding, which tends to look better with longer URLs:

+ +
[id]: http://example.com/longish/path/to/resource/here
+    "Optional Title Here"
+
+ +

Link definitions are only used for creating links during Markdown +processing, and are stripped from your document in the HTML output.

+ +

Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are not case sensitive. E.g. these two links:

+ +
[link text][a]
+[link text][A]
+
+ +

are equivalent.

+ +

The implicit link name shortcut allows you to omit the name of the +link, in which case the link text itself is used as the name. +Just use an empty set of square brackets -- e.g., to link the word +"Google" to the google.com web site, you could simply write:

+ +
[Google][]
+
+ +

And then define the link:

+ +
[Google]: http://google.com/
+
+ +

Because link names may contain spaces, this shortcut even works for +multiple words in the link text:

+ +
Visit [Daring Fireball][] for more information.
+
+ +

And then define the link:

+ +
[Daring Fireball]: http://daringfireball.net/
+
+ +

Link definitions can be placed anywhere in your Markdown document. I +tend to put them immediately after each paragraph in which they're +used, but if you want, you can put them all at the end of your +document, sort of like footnotes.

+ +

Here's an example of reference links in action:

+ +
I get 10 times more traffic from [Google] [1] than from
+[Yahoo] [2] or [MSN] [3].
+
+  [1]: http://google.com/        "Google"
+  [2]: http://search.yahoo.com/  "Yahoo Search"
+  [3]: http://search.msn.com/    "MSN Search"
+
+ +

Using the implicit link name shortcut, you could instead write:

+ +
I get 10 times more traffic from [Google][] than from
+[Yahoo][] or [MSN][].
+
+  [google]: http://google.com/        "Google"
+  [yahoo]:  http://search.yahoo.com/  "Yahoo Search"
+  [msn]:    http://search.msn.com/    "MSN Search"
+
+ +

Both of the above examples will produce the following HTML output:

+ +
<p>I get 10 times more traffic from <a href="http://google.com/"
+title="Google">Google</a> than from
+<a href="http://search.yahoo.com/" title="Yahoo Search">Yahoo</a>
+or <a href="http://search.msn.com/" title="MSN Search">MSN</a>.</p>
+
+ +

For comparison, here is the same paragraph written using +Markdown's inline link style:

+ +
I get 10 times more traffic from [Google](http://google.com/ "Google")
+than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or
+[MSN](http://search.msn.com/ "MSN Search").
+
+ +

The point of reference-style links is not that they're easier to +write. The point is that with reference-style links, your document +source is vastly more readable. Compare the above examples: using +reference-style links, the paragraph itself is only 81 characters +long; with inline-style links, it's 176 characters; and as raw HTML, +it's 234 characters. In the raw HTML, there's more markup than there +is text.

+ +

With Markdown's reference-style links, a source document much more +closely resembles the final output, as rendered in a browser. By +allowing you to move the markup-related metadata out of the paragraph, +you can add links without interrupting the narrative flow of your +prose.

+ +

Emphasis

+ +

Markdown treats asterisks (*) and underscores (_) as indicators of +emphasis. Text wrapped with one * or _ will be wrapped with an +HTML <em> tag; double *'s or _'s will be wrapped with an HTML +<strong> tag. E.g., this input:

+ +
*single asterisks*
+
+_single underscores_
+
+**double asterisks**
+
+__double underscores__
+
+ +

will produce:

+ +
<em>single asterisks</em>
+
+<em>single underscores</em>
+
+<strong>double asterisks</strong>
+
+<strong>double underscores</strong>
+
+ +

You can use whichever style you prefer; the lone restriction is that +the same character must be used to open and close an emphasis span.

+ +

Emphasis can be used in the middle of a word:

+ +
un*fucking*believable
+
+ +

But if you surround an * or _ with spaces, it'll be treated as a +literal asterisk or underscore.

+ +

To produce a literal asterisk or underscore at a position where it +would otherwise be used as an emphasis delimiter, you can backslash +escape it:

+ +
\*this text is surrounded by literal asterisks\*
+
+ +

Code

+ +

To indicate a span of code, wrap it with backtick quotes (`). +Unlike a pre-formatted code block, a code span indicates code within a +normal paragraph. For example:

+ +
Use the `printf()` function.
+
+ +

will produce:

+ +
<p>Use the <code>printf()</code> function.</p>
+
+ +

To include a literal backtick character within a code span, you can use +multiple backticks as the opening and closing delimiters:

+ +
``There is a literal backtick (`) here.``
+
+ +

which will produce this:

+ +
<p><code>There is a literal backtick (`) here.</code></p>
+
+ +

The backtick delimiters surrounding a code span may include spaces -- +one after the opening, one before the closing. This allows you to place +literal backtick characters at the beginning or end of a code span:

+ +
A single backtick in a code span: `` ` ``
+
+A backtick-delimited string in a code span: `` `foo` ``
+
+ +

will produce:

+ +
<p>A single backtick in a code span: <code>`</code></p>
+
+<p>A backtick-delimited string in a code span: <code>`foo`</code></p>
+
+ +

With a code span, ampersands and angle brackets are encoded as HTML +entities automatically, which makes it easy to include example HTML +tags. Markdown will turn this:

+ +
Please don't use any `<blink>` tags.
+
+ +

into:

+ +
<p>Please don't use any <code>&lt;blink&gt;</code> tags.</p>
+
+ +

You can write this:

+ +
`&#8212;` is the decimal-encoded equivalent of `&mdash;`.
+
+ +

to produce:

+ +
<p><code>&amp;#8212;</code> is the decimal-encoded
+equivalent of <code>&amp;mdash;</code>.</p>
+
+ +

Images

+ +

Admittedly, it's fairly difficult to devise a "natural" syntax for +placing images into a plain text document format.

+ +

Markdown uses an image syntax that is intended to resemble the syntax +for links, allowing for two styles: inline and reference.

+ +

Inline image syntax looks like this:

+ +
![Alt text](/path/to/img.jpg)
+
+![Alt text](/path/to/img.jpg "Optional title")
+
+ +

That is:

+ +
    +
  • An exclamation mark: !;
  • +
  • followed by a set of square brackets, containing the alt +attribute text for the image;
  • +
  • followed by a set of parentheses, containing the URL or path to +the image, and an optional title attribute enclosed in double +or single quotes.
  • +
+ +

Reference-style image syntax looks like this:

+ +
![Alt text][id]
+
+ +

Where "id" is the name of a defined image reference. Image references +are defined using syntax identical to link references:

+ +
[id]: url/to/image  "Optional title attribute"
+
+ +

As of this writing, Markdown has no syntax for specifying the +dimensions of an image; if this is important to you, you can simply +use regular HTML <img> tags.

+ +
+ +

Miscellaneous

+ + + +

Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this:

+ +
<http://example.com/>
+
+ +

Markdown will turn this into:

+ +
<a href="http://example.com/">http://example.com/</a>
+
+ +

Automatic links for email addresses work similarly, except that +Markdown will also perform a bit of randomized decimal and hex +entity-encoding to help obscure your address from address-harvesting +spambots. For example, Markdown will turn this:

+ +
<address@example.com>
+
+ +

into something like this:

+ +
<a href="&#x6D;&#x61;i&#x6C;&#x74;&#x6F;:&#x61;&#x64;&#x64;&#x72;&#x65;
+&#115;&#115;&#64;&#101;&#120;&#x61;&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;
+&#109;">&#x61;&#x64;&#x64;&#x72;&#x65;&#115;&#115;&#64;&#101;&#120;&#x61;
+&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;&#109;</a>
+
+ +

which will render in a browser as a clickable link to "address@example.com".

+ +

(This sort of entity-encoding trick will indeed fool many, if not +most, address-harvesting bots, but it definitely won't fool all of +them. It's better than nothing, but an address published in this way +will probably eventually start receiving spam.)

+ +

Backslash Escapes

+ +

Markdown allows you to use backslash escapes to generate literal +characters which would otherwise have special meaning in Markdown's +formatting syntax. For example, if you wanted to surround a word with +literal asterisks (instead of an HTML <em> tag), you can backslashes +before the asterisks, like this:

+ +
\*literal asterisks\*
+
+ +

Markdown provides backslash escapes for the following characters:

+ +
\   backslash
+`   backtick
+*   asterisk
+_   underscore
+{}  curly braces
+[]  square brackets
+()  parentheses
+#   hash mark
++   plus sign
+-   minus sign (hyphen)
+.   dot
+!   exclamation mark
+
+]] + +markdown_nested_blockquotes = [[ +> foo +> +> > bar +> +> foo +~ +
+

foo

+ +
+

bar

+
+ +

foo

+
+]] + +markdown_ordered_and_unordered_lists = [[ +## Unordered + +Asterisks tight: + +* asterisk 1 +* asterisk 2 +* asterisk 3 + + +Asterisks loose: + +* asterisk 1 + +* asterisk 2 + +* asterisk 3 + +* * * + +Pluses tight: + ++ Plus 1 ++ Plus 2 ++ Plus 3 + + +Pluses loose: + ++ Plus 1 + ++ Plus 2 + ++ Plus 3 + +* * * + + +Minuses tight: + +- Minus 1 +- Minus 2 +- Minus 3 + + +Minuses loose: + +- Minus 1 + +- Minus 2 + +- Minus 3 + + +## Ordered + +Tight: + +1. First +2. Second +3. Third + +and: + +1. One +2. Two +3. Three + + +Loose using tabs: + +1. First + +2. Second + +3. Third + +and using spaces: + +1. One + +2. Two + +3. Three + +Multiple paragraphs: + +1. Item 1, graf one. + + Item 2. graf two. The quick brown fox jumped over the lazy dog's + back. + +2. Item 2. + +3. Item 3. + + + +## Nested + +* Tab + * Tab + * Tab + +Here's another: + +1. First +2. Second: + * Fee + * Fie + * Foe +3. Third + +Same thing but with paragraphs: + +1. First + +2. Second: + * Fee + * Fie + * Foe + +3. Third +~ +

Unordered

+ +

Asterisks tight:

+ +
    +
  • asterisk 1
  • +
  • asterisk 2
  • +
  • asterisk 3
  • +
+ +

Asterisks loose:

+ +
    +
  • asterisk 1

  • +
  • asterisk 2

  • +
  • asterisk 3

  • +
+ +
+ +

Pluses tight:

+ +
    +
  • Plus 1
  • +
  • Plus 2
  • +
  • Plus 3
  • +
+ +

Pluses loose:

+ +
    +
  • Plus 1

  • +
  • Plus 2

  • +
  • Plus 3

  • +
+ +
+ +

Minuses tight:

+ +
    +
  • Minus 1
  • +
  • Minus 2
  • +
  • Minus 3
  • +
+ +

Minuses loose:

+ +
    +
  • Minus 1

  • +
  • Minus 2

  • +
  • Minus 3

  • +
+ +

Ordered

+ +

Tight:

+ +
    +
  1. First
  2. +
  3. Second
  4. +
  5. Third
  6. +
+ +

and:

+ +
    +
  1. One
  2. +
  3. Two
  4. +
  5. Three
  6. +
+ +

Loose using tabs:

+ +
    +
  1. First

  2. +
  3. Second

  4. +
  5. Third

  6. +
+ +

and using spaces:

+ +
    +
  1. One

  2. +
  3. Two

  4. +
  5. Three

  6. +
+ +

Multiple paragraphs:

+ +
    +
  1. Item 1, graf one.

    + +

    Item 2. graf two. The quick brown fox jumped over the lazy dog's +back.

  2. +
  3. Item 2.

  4. +
  5. Item 3.

  6. +
+ +

Nested

+ +
    +
  • Tab +
      +
    • Tab +
        +
      • Tab
      • +
    • +
  • +
+ +

Here's another:

+ +
    +
  1. First
  2. +
  3. Second: +
      +
    • Fee
    • +
    • Fie
    • +
    • Foe
    • +
  4. +
  5. Third
  6. +
+ +

Same thing but with paragraphs:

+ +
    +
  1. First

  2. +
  3. Second:

    + +
      +
    • Fee
    • +
    • Fie
    • +
    • Foe
    • +
  4. +
  5. Third

  6. +
+]] + +markdown_strong_and_em_together = [[ +***This is strong and em.*** + +So is ***this*** word. + +___This is strong and em.___ + +So is ___this___ word. +~ +

This is strong and em.

+ +

So is this word.

+ +

This is strong and em.

+ +

So is this word.

+]] + +markdown_tabs = [[ ++ this is a list item + indented with tabs + ++ this is a list item + indented with spaces + +Code: + + this code block is indented by one tab + +And: + + this code block is indented by two tabs + +And: + + + this is an example list item + indented with tabs + + + this is an example list item + indented with spaces +~ +
    +
  • this is a list item +indented with tabs

  • +
  • this is a list item +indented with spaces

  • +
+ +

Code:

+ +
this code block is indented by one tab
+
+ +

And:

+ +
    this code block is indented by two tabs
+
+ +

And:

+ +
+   this is an example list item
+    indented with tabs
+
++   this is an example list item
+    indented with spaces
+
+]] + +markdown_tidyness = [[ +> A list within a blockquote: +> +> * asterisk 1 +> * asterisk 2 +> * asterisk 3 +~ +
+

A list within a blockquote:

+
    +
  • asterisk 1
  • +
  • asterisk 2
  • +
  • asterisk 3
  • +
+
+]] + +bug_from_paul_chiusano_gmail_com = [[ +[context-free grammar][CFG] +[notated][BNF] +[unrestricted grammar][] + +[CFG]: +[BNF]: +[unrestricted grammar]: +~ +

context-free grammar
+notated
+unrestricted grammar

+~ +*`foo()`* +~ +

foo()

+]] + +links = [[ +[the_dog](the_dog.html) +~ +

the_dog

+~ +[the\_dog\_bones](bones.html) +~ +

the_dog_bones

+~ +[bones](the_dog_bones.html) +~ +

bones

+~ +![the_dog](the_dog.jpg) +~ +

the_dog

+~ +![the_dog_bones](bones.jpg) +~ +

the_dog_bones

+~ +![bones](the_dog_bones.jpg) +~ +

bones

+~ +_Kalle_ +~ +

Kalle

+]] + +bugs = [==[ +[the dog](the%20dog.html) +~ +

the dog

+~ +min(a,b)<=xmin(a,b)<=x +~ +![this "should" be fixed & escaped](the_dog_bones.jpg) +~ +

this "should" be fixed & escaped

+~ +![this "should" be fixed & escaped < > '](the_dog_bones.jpg) +~ +

this "should" be fixed & escaped < > '

+~ +_**strong**_ **_stuff_** +~ +

strong stuff

+~ +
%2B19035551212
+~ +
%2B19035551212
+~ +_This article is about the simulation language. For the specification language, see [Z notation][1]._ +~ +

This article is about the simulation language. For the specification language, see [Z notation][1].

+~ +**[Coat of Arms][3]** + +[3]: Polish_heraldry (Polish heraldry) +~ +

Coat of Arms

+~ +_**![][18]Wikimedia Atlas of Mayotte** [![][19]][20]._ + +[18]: mayotte/file.1x1white.gif (This image is not present because of licensing restrictions) +[19]: mayotte/file.90.png +[20]: mayotte/file.9955.png +~ +

Wikimedia Atlas of Mayotte .

+~ +"Grant me chastity and continence, but not yet" [da mihi castitatem et continentiam, sed noli modo] (Conf., VIII. vii (17)). +~ +

"Grant me chastity and continence, but not yet" [da mihi castitatem et continentiam, sed noli modo] (Conf., VIII. vii (17)).

+~ +0011 1100 +60 +3C +< +~ +

0011 1100 +60 +3C +<

+~ +The _**Peterborough Chronicle**_ (also called the _**Laud Manuscript**_), one of the _Anglo-Saxon Chronicles_, contains +~ +

The Peterborough Chronicle (also called the Laud Manuscript), one of the Anglo-Saxon Chronicles, contains

+~ +foo\_ok_ +~ +

foo_ok_

+~ +[Atom][103] + +[103]: http://en.wikipedia.org/wiki/Atom_(standard) +~ +

Atom

+~ +_I don't find (y - 1) * max_x + x to be any less readable than y * max_x + x --TimothyDowns[?][5]_ + +[5]: http://lua-users.org/cgi-bin/wiki.pl?action=edit&id=TimothyDowns +~ +

I don't find (y - 1) * maxx + x to be any less readable than y * maxx + x --TimothyDowns?

+~ +1. One + + # alpha + # beta +~ +
    +
  1. One

    + +
    # alpha
    +# beta
    +
  2. +
+~ +This is [an example][id] reference-style link. + +[id]: http://example1.com/ "Optional Title Here" + +Other text. + +This is [an example] [id] reference-style link. +[id]: http://example2.com/ +~ +

This is an example reference-style link.

+ +

Other text.

+ +

This is an example reference-style link.

+~ +- Idiomatic + +[lulu]: lulu/file.lulu.png + +[50]: http://www.lua.org/manual/5.1/ +~ +
    +
  • Idiomatic
  • +
+~ +[Wesel - Boxtel [KBS 134]](kbs/wes-box-134.md.lp) +~ +

Wesel - Boxtel [KBS 134]

+]==] + +-- Unhandled test: _M*A*S*H_ + +setfenv(1, _G) + +-- Test running function +local function run_tests() + -- Do any
 sequences in s1 and s2 match up perfectly?
+	local function pre_equal(s1, s2)
+		local pre = {}
+		for p in s1:gfind("
.-
") do + pre[#pre+1] = p + end + for p in s2:gfind("
.-
") do + if p ~= pre[1] then return false end + table.remove(pre, 1) + end + return #pre == 0 + end + + -- Are s1 and s2 equal except for whitespace issues. + local function nonspace_equal(s1, s2) + s1 = s1:gsub("[ \t\n\r]", "") + s2 = s2:gsub("[ \t\n\r]", "") + return s1 == s2 + end + + -- Runs a single test + local function run_single_test(name, source, desired) + local result = markdown(source) + local res = pre_equal(result, desired) and nonspace_equal(result, desired) + if not res and not quiet_mode then + print("********** TEST FAILED **********") + print(name) + print("----- Input:") + print(source) + print("----- Expected output:") + print(desired) + print("----- Actual output:") + print(result) + print("*********************************") + print() + end + return res + end + + -- Runs a specified test battery + local function run_test(name, code) + local failed, succeeded = 0,0 + local data = split(code, "\n~[ \t]*\n") + for i = 1, #data, 2 do + if run_single_test(name, data[i], data[i+1]) then + succeeded = succeeded + 1 + else + failed = failed + 1 + end + end + if failed>0 and not quiet_mode then + print(string.format("%-20s %15s %5i %15s %5i", name, "Succeeded:", succeeded, "Failed:", failed)) + end + return failed + end + + total_failed = 0 + for _,k in ipairs(TESTS._indices) do + total_failed = total_failed + run_test(k,TESTS[k]) + end + if total_failed > 0 then + os.exit(-1) + end +end + +for i=1,#arg do + if arg[i] == "-q" then + quiet_mode = true + end +end + +run_tests() \ No newline at end of file diff --git a/markdown.lua b/markdown.lua new file mode 100644 index 0000000..5181dd8 --- /dev/null +++ b/markdown.lua @@ -0,0 +1,1359 @@ +#!/usr/bin/env lua + +--[[ +# markdown.lua -- version 0.32 + + + +**Author:** Niklas Frykholm, +**Date:** 31 May 2008 + +This is an implementation of the popular text markup language Markdown in pure Lua. +Markdown can convert documents written in a simple and easy to read text format +to well-formatted HTML. For a more thourough description of Markdown and the Markdown +syntax, see . + +The original Markdown source is written in Perl and makes heavy use of advanced +regular expression techniques (such as negative look-ahead, etc) which are not available +in Lua's simple regex engine. Therefore this Lua port has been rewritten from the ground +up. It is probably not completely bug free. If you notice any bugs, please report them to +me. A unit test that exposes the error is helpful. + +## Usage + + require "markdown" + markdown(source) + +``markdown.lua`` exposes a single global function named ``markdown(s)`` which applies the +Markdown transformation to the specified string. + +``markdown.lua`` can also be used directly from the command line: + + lua markdown.lua test.md + +Creates a file ``test.html`` with the converted content of ``test.md``. Run: + + lua markdown.lua -h + +For a description of the command-line options. + +``markdown.lua`` uses the same license as Lua, the MIT license. + +## License + +Copyright © 2008 Niklas Frykholm. + +Permission is hereby granted, free of charge, to any person obtaining a copy of this +software and associated documentation files (the "Software"), to deal in the Software +without restriction, including without limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons +to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies +or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +## Version history + +- **0.32** -- 31 May 2008 + - Fix for links containing brackets +- **0.31** -- 1 Mar 2008 + - Fix for link definitions followed by spaces +- **0.30** -- 25 Feb 2008 + - Consistent behavior with Markdown when the same link reference is reused +- **0.29** -- 24 Feb 2008 + - Fix for
 blocks with spaces in them
+-	**0.28** -- 18 Feb 2008
+	-	Fix for link encoding
+-	**0.27** -- 14 Feb 2008
+	-	Fix for link database links with ()
+-	**0.26** -- 06 Feb 2008
+	-	Fix for nested italic and bold markers
+-	**0.25** -- 24 Jan 2008
+	-	Fix for encoding of naked <
+-	**0.24** -- 21 Jan 2008
+	-	Fix for link behavior.
+-	**0.23** -- 10 Jan 2008
+	-	Fix for a regression bug in longer expressions in italic or bold.
+-	**0.22** -- 27 Dec 2007
+	-	Fix for crash when processing blocks with a percent sign in them.
+-	**0.21** -- 27 Dec 2007
+	- 	Fix for combined strong and emphasis tags
+-	**0.20** -- 13 Oct 2007
+	-	Fix for < as well in image titles, now matches Dingus behavior
+-	**0.19** -- 28 Sep 2007
+	-	Fix for quotation marks " and ampersands & in link and image titles.
+-	**0.18** -- 28 Jul 2007
+	-	Does not crash on unmatched tags (behaves like standard markdown)
+-	**0.17** -- 12 Apr 2007
+	-	Fix for links with %20 in them.
+-	**0.16** -- 12 Apr 2007
+	-	Do not require arg global to exist.
+-	**0.15** -- 28 Aug 2006
+	-	Better handling of links with underscores in them.
+-	**0.14** -- 22 Aug 2006
+	-	Bug for *`foo()`*
+-	**0.13** -- 12 Aug 2006
+	-	Added -l option for including stylesheet inline in document.
+	-	Fixed bug in -s flag.
+	-	Fixed emphasis bug.
+-	**0.12** -- 15 May 2006
+	-	Fixed several bugs to comply with MarkdownTest 1.0 
+-	**0.11** -- 12 May 2006
+	-	Fixed bug for escaping `*` and `_` inside code spans.
+	-	Added license terms.
+	-	Changed join() to table.concat().
+-	**0.10** -- 3 May 2006
+	-	Initial public release.
+
+// Niklas
+]]
+
+
+-- Set up a table for holding local functions to avoid polluting the global namespace
+local M = {}
+local MT = {__index = _G}
+setmetatable(M, MT)
+setfenv(1, M)
+
+----------------------------------------------------------------------
+-- Utility functions
+----------------------------------------------------------------------
+
+-- Locks table t from changes, writes an error if someone attempts to change the table.
+-- This is useful for detecting variables that have "accidently" been made global. Something
+-- I tend to do all too much.
+function lock(t)
+	function lock_new_index(t, k, v)
+		error("module has been locked -- " .. k .. " must be declared local", 2)
+	end
+
+	local mt = {__newindex = lock_new_index}
+	if getmetatable(t) then mt.__index = getmetatable(t).__index end
+	setmetatable(t, mt)
+end
+
+-- Returns the result of mapping the values in table t through the function f
+function map(t, f)
+	local out = {}
+	for k,v in pairs(t) do out[k] = f(v,k) end
+	return out
+end
+
+-- The identity function, useful as a placeholder.
+function identity(text) return text end
+
+-- Functional style if statement. (NOTE: no short circuit evaluation)
+function iff(t, a, b) if t then return a else return b end end
+
+-- Splits the text into an array of separate lines.
+function split(text, sep)
+	sep = sep or "\n"
+	local lines = {}
+	local pos = 1
+	while true do
+		local b,e = text:find(sep, pos)
+		if not b then table.insert(lines, text:sub(pos)) break end
+		table.insert(lines, text:sub(pos, b-1))
+		pos = e + 1
+	end
+	return lines
+end
+
+-- Converts tabs to spaces
+function detab(text)
+	local tab_width = 4
+	local function rep(match)
+		local spaces = -match:len()
+		while spaces<1 do spaces = spaces + tab_width end
+		return match .. string.rep(" ", spaces)
+	end
+	text = text:gsub("([^\n]-)\t", rep)
+	return text
+end
+
+-- Applies string.find for every pattern in the list and returns the first match
+function find_first(s, patterns, index)
+	local res = {}
+	for _,p in ipairs(patterns) do
+		local match = {s:find(p, index)}
+		if #match>0 and (#res==0 or match[1] < res[1]) then res = match end
+	end
+	return unpack(res)
+end
+
+-- If a replacement array is specified, the range [start, stop] in the array is replaced
+-- with the replacement array and the resulting array is returned. Without a replacement
+-- array the section of the array between start and stop is returned.
+function splice(array, start, stop, replacement)
+	if replacement then
+		local n = stop - start + 1
+		while n > 0 do
+			table.remove(array, start)
+			n = n - 1
+		end
+		for i,v in ipairs(replacement) do
+			table.insert(array, start, v)
+		end
+		return array
+	else
+		local res = {}
+		for i = start,stop do
+			table.insert(res, array[i])
+		end
+		return res
+	end
+end
+
+-- Outdents the text one step.
+function outdent(text)
+	text = "\n" .. text
+	text = text:gsub("\n  ? ? ?", "\n")
+	text = text:sub(2)
+	return text
+end
+
+-- Indents the text one step.
+function indent(text)
+	text = text:gsub("\n", "\n    ")
+	return text
+end
+
+-- Does a simple tokenization of html data. Returns the data as a list of tokens. 
+-- Each token is a table with a type field (which is either "tag" or "text") and
+-- a text field (which contains the original token data).
+function tokenize_html(html)
+	local tokens = {}
+	local pos = 1
+	while true do
+		local start = find_first(html, {"", start)
+		elseif html:match("^<%?", start) then
+			_,stop = html:find("?>", start)
+		else
+			_,stop = html:find("%b<>", start)
+		end
+		if not stop then
+			-- error("Could not match html tag " .. html:sub(start,start+30)) 
+		 	table.insert(tokens, {type="text", text=html:sub(start, start)})
+			pos = start + 1
+		else
+			table.insert(tokens, {type="tag", text=html:sub(start, stop)})
+			pos = stop + 1
+		end
+	end
+	return tokens
+end
+
+----------------------------------------------------------------------
+-- Hash
+----------------------------------------------------------------------
+
+-- This is used to "hash" data into alphanumeric strings that are unique
+-- in the document. (Note that this is not cryptographic hash, the hash
+-- function is not one-way.) The hash procedure is used to protect parts
+-- of the document from further processing.
+
+local HASH = {
+	-- Has the hash been inited.
+	inited = false,
+	
+	-- The unique string prepended to all hash values. This is to ensure
+	-- that hash values do not accidently coincide with an actual existing
+	-- string in the document.
+	identifier = "",
+	
+	-- Counter that counts up for each new hash instance.
+	counter = 0,
+	
+	-- Hash table.
+	table = {}
+}
+
+-- Inits hashing. Creates a hash_identifier that doesn't occur anywhere
+-- in the text.
+function init_hash(text)
+	HASH.inited = true
+	HASH.identifier = ""
+	HASH.counter = 0
+	HASH.table = {}
+	
+	local s = "HASH"
+	local counter = 0
+	local id
+	while true do
+		id  = s .. counter
+		if not text:find(id, 1, true) then break end
+		counter = counter + 1
+	end
+	HASH.identifier = id
+end
+
+-- Returns the hashed value for s.
+function hash(s)
+	assert(HASH.inited)
+	if not HASH.table[s] then
+		HASH.counter = HASH.counter + 1
+		local id = HASH.identifier .. HASH.counter .. "X"
+		HASH.table[s] = id
+	end
+	return HASH.table[s]
+end
+
+----------------------------------------------------------------------
+-- Protection
+----------------------------------------------------------------------
+
+-- The protection module is used to "protect" parts of a document
+-- so that they are not modified by subsequent processing steps. 
+-- Protected parts are saved in a table for later unprotection
+
+-- Protection data
+local PD = {
+	-- Saved blocks that have been converted
+	blocks = {},
+
+	-- Block level tags that will be protected
+	tags = {"p", "div", "h1", "h2", "h3", "h4", "h5", "h6", "blockquote",
+	"pre", "table", "dl", "ol", "ul", "script", "noscript", "form", "fieldset",
+	"iframe", "math", "ins", "del"}
+}
+
+-- Pattern for matching a block tag that begins and ends in the leftmost
+-- column and may contain indented subtags, i.e.
+-- 
+-- A nested block. +--
+-- Nested data. +--
+--
+function block_pattern(tag) + return "\n<" .. tag .. ".-\n[ \t]*\n" +end + +-- Pattern for matching a block tag that begins and ends with a newline +function line_pattern(tag) + return "\n<" .. tag .. ".-[ \t]*\n" +end + +-- Protects the range of characters from start to stop in the text and +-- returns the protected string. +function protect_range(text, start, stop) + local s = text:sub(start, stop) + local h = hash(s) + PD.blocks[h] = s + text = text:sub(1,start) .. h .. text:sub(stop) + return text +end + +-- Protect every part of the text that matches any of the patterns. The first +-- matching pattern is protected first, etc. +function protect_matches(text, patterns) + while true do + local start, stop = find_first(text, patterns) + if not start then break end + text = protect_range(text, start, stop) + end + return text +end + +-- Protects blocklevel tags in the specified text +function protect(text) + -- First protect potentially nested block tags + text = protect_matches(text, map(PD.tags, block_pattern)) + -- Then protect block tags at the line level. + text = protect_matches(text, map(PD.tags, line_pattern)) + -- Protect
and comment tags + text = protect_matches(text, {"\n]->[ \t]*\n"}) + text = protect_matches(text, {"\n[ \t]*\n"}) + return text +end + +-- Returns true if the string s is a hash resulting from protection +function is_protected(s) + return PD.blocks[s] +end + +-- Unprotects the specified text by expanding all the nonces +function unprotect(text) + for k,v in pairs(PD.blocks) do + v = v:gsub("%%", "%%%%") + text = text:gsub(k, v) + end + return text +end + + +---------------------------------------------------------------------- +-- Block transform +---------------------------------------------------------------------- + +-- The block transform functions transform the text on the block level. +-- They work with the text as an array of lines rather than as individual +-- characters. + +-- Returns true if the line is a ruler of (char) characters. +-- The line must contain at least three char characters and contain only spaces and +-- char characters. +function is_ruler_of(line, char) + if not line:match("^[ %" .. char .. "]*$") then return false end + if not line:match("%" .. char .. ".*%" .. char .. ".*%" .. char) then return false end + return true +end + +-- Identifies the block level formatting present in the line +function classify(line) + local info = {line = line, text = line} + + if line:match("^ ") then + info.type = "indented" + info.outdented = line:sub(5) + return info + end + + for _,c in ipairs({'*', '-', '_', '='}) do + if is_ruler_of(line, c) then + info.type = "ruler" + info.ruler_char = c + return info + end + end + + if line == "" then + info.type = "blank" + return info + end + + if line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$") then + local m1, m2 = line:match("^(#+)[ \t]*(.-)[ \t]*#*[ \t]*$") + info.type = "header" + info.level = m1:len() + info.text = m2 + return info + end + + if line:match("^ ? ? ?(%d+)%.[ \t]+(.+)") then + local number, text = line:match("^ ? ? ?(%d+)%.[ \t]+(.+)") + info.type = "list_item" + info.list_type = "numeric" + info.number = 0 + number + info.text = text + return info + end + + if line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)") then + local bullet, text = line:match("^ ? ? ?([%*%+%-])[ \t]+(.+)") + info.type = "list_item" + info.list_type = "bullet" + info.bullet = bullet + info.text= text + return info + end + + if line:match("^>[ \t]?(.*)") then + info.type = "blockquote" + info.text = line:match("^>[ \t]?(.*)") + return info + end + + if is_protected(line) then + info.type = "raw" + info.html = unprotect(line) + return info + end + + info.type = "normal" + return info +end + +-- Find headers constisting of a normal line followed by a ruler and converts them to +-- header entries. +function headers(array) + local i = 1 + while i <= #array - 1 do + if array[i].type == "normal" and array[i+1].type == "ruler" and + (array[i+1].ruler_char == "-" or array[i+1].ruler_char == "=") then + local info = {line = array[i].line} + info.text = info.line + info.type = "header" + info.level = iff(array[i+1].ruler_char == "=", 1, 2) + table.remove(array, i+1) + array[i] = info + end + i = i + 1 + end + return array +end + +-- Find list blocks and convert them to protected data blocks +function lists(array, sublist) + local function process_list(arr) + local function any_blanks(arr) + for i = 1, #arr do + if arr[i].type == "blank" then return true end + end + return false + end + + local function split_list_items(arr) + local acc = {arr[1]} + local res = {} + for i=2,#arr do + if arr[i].type == "list_item" then + table.insert(res, acc) + acc = {arr[i]} + else + table.insert(acc, arr[i]) + end + end + table.insert(res, acc) + return res + end + + local function process_list_item(lines, block) + while lines[#lines].type == "blank" do + table.remove(lines) + end + + local itemtext = lines[1].text + for i=2,#lines do + itemtext = itemtext .. "\n" .. outdent(lines[i].line) + end + if block then + itemtext = block_transform(itemtext, true) + if not itemtext:find("
") then itemtext = indent(itemtext) end
+				return "    
  • " .. itemtext .. "
  • " + else + local lines = split(itemtext) + lines = map(lines, classify) + lines = lists(lines, true) + lines = blocks_to_html(lines, true) + itemtext = table.concat(lines, "\n") + if not itemtext:find("
    ") then itemtext = indent(itemtext) end
    +				return "    
  • " .. itemtext .. "
  • " + end + end + + local block_list = any_blanks(arr) + local items = split_list_items(arr) + local out = "" + for _, item in ipairs(items) do + out = out .. process_list_item(item, block_list) .. "\n" + end + if arr[1].list_type == "numeric" then + return "
      \n" .. out .. "
    " + else + return "
      \n" .. out .. "
    " + end + end + + -- Finds the range of lines composing the first list in the array. A list + -- starts with (^ list_item) or (blank list_item) and ends with + -- (blank* $) or (blank normal). + -- + -- A sublist can start with just (list_item) does not need a blank... + local function find_list(array, sublist) + local function find_list_start(array, sublist) + if array[1].type == "list_item" then return 1 end + if sublist then + for i = 1,#array do + if array[i].type == "list_item" then return i end + end + else + for i = 1, #array-1 do + if array[i].type == "blank" and array[i+1].type == "list_item" then + return i+1 + end + end + end + return nil + end + local function find_list_end(array, start) + local pos = #array + for i = start, #array-1 do + if array[i].type == "blank" and array[i+1].type ~= "list_item" + and array[i+1].type ~= "indented" and array[i+1].type ~= "blank" then + pos = i-1 + break + end + end + while pos > start and array[pos].type == "blank" do + pos = pos - 1 + end + return pos + end + + local start = find_list_start(array, sublist) + if not start then return nil end + return start, find_list_end(array, start) + end + + while true do + local start, stop = find_list(array, sublist) + if not start then break end + local text = process_list(splice(array, start, stop)) + local info = { + line = text, + type = "raw", + html = text + } + array = splice(array, start, stop, {info}) + end + + -- Convert any remaining list items to normal + for _,line in ipairs(array) do + if line.type == "list_item" then line.type = "normal" end + end + + return array +end + +-- Find and convert blockquote markers. +function blockquotes(lines) + local function find_blockquote(lines) + local start + for i,line in ipairs(lines) do + if line.type == "blockquote" then + start = i + break + end + end + if not start then return nil end + + local stop = #lines + for i = start+1, #lines do + if lines[i].type == "blank" or lines[i].type == "blockquote" then + elseif lines[i].type == "normal" then + if lines[i-1].type == "blank" then stop = i-1 break end + else + stop = i-1 break + end + end + while lines[stop].type == "blank" do stop = stop - 1 end + return start, stop + end + + local function process_blockquote(lines) + local raw = lines[1].text + for i = 2,#lines do + raw = raw .. "\n" .. lines[i].text + end + local bt = block_transform(raw) + if not bt:find("
    ") then bt = indent(bt) end
    +		return "
    \n " .. bt .. + "\n
    " + end + + while true do + local start, stop = find_blockquote(lines) + if not start then break end + local text = process_blockquote(splice(lines, start, stop)) + local info = { + line = text, + type = "raw", + html = text + } + lines = splice(lines, start, stop, {info}) + end + return lines +end + +-- Find and convert codeblocks. +function codeblocks(lines) + local function find_codeblock(lines) + local start + for i,line in ipairs(lines) do + if line.type == "indented" then start = i break end + end + if not start then return nil end + + local stop = #lines + for i = start+1, #lines do + if lines[i].type ~= "indented" and lines[i].type ~= "blank" then + stop = i-1 + break + end + end + while lines[stop].type == "blank" do stop = stop - 1 end + return start, stop + end + + local function process_codeblock(lines) + local raw = detab(encode_code(outdent(lines[1].line))) + for i = 2,#lines do + raw = raw .. "\n" .. detab(encode_code(outdent(lines[i].line))) + end + return "
    " .. raw .. "\n
    " + end + + while true do + local start, stop = find_codeblock(lines) + if not start then break end + local text = process_codeblock(splice(lines, start, stop)) + local info = { + line = text, + type = "raw", + html = text + } + lines = splice(lines, start, stop, {info}) + end + return lines +end + +-- Convert lines to html code +function blocks_to_html(lines, no_paragraphs) + local out = {} + local i = 1 + while i <= #lines do + local line = lines[i] + if line.type == "ruler" then + table.insert(out, "
    ") + elseif line.type == "raw" then + table.insert(out, line.html) + elseif line.type == "normal" then + local s = line.line + + while i+1 <= #lines and lines[i+1].type == "normal" do + i = i + 1 + s = s .. "\n" .. lines[i].line + end + + if no_paragraphs then + table.insert(out, span_transform(s)) + else + table.insert(out, "

    " .. span_transform(s) .. "

    ") + end + elseif line.type == "header" then + local s = "" .. span_transform(line.text) .. "" + table.insert(out, s) + else + table.insert(out, line.line) + end + i = i + 1 + end + return out +end + +-- Perform all the block level transforms +function block_transform(text, sublist) + local lines = split(text) + lines = map(lines, classify) + lines = headers(lines) + lines = lists(lines, sublist) + lines = codeblocks(lines) + lines = blockquotes(lines) + lines = blocks_to_html(lines) + local text = table.concat(lines, "\n") + return text +end + +-- Debug function for printing a line array to see the result +-- of partial transforms. +function print_lines(lines) + for i, line in ipairs(lines) do + print(i, line.type, line.text or line.line) + end +end + +---------------------------------------------------------------------- +-- Span transform +---------------------------------------------------------------------- + +-- Functions for transforming the text at the span level. + +-- These characters may need to be escaped because they have a special +-- meaning in markdown. +escape_chars = "'\\`*_{}[]()>#+-.!'" +escape_table = {} + +function init_escape_table() + escape_table = {} + for i = 1,#escape_chars do + local c = escape_chars:sub(i,i) + escape_table[c] = hash(c) + end +end + +-- Adds a new escape to the escape table. +function add_escape(text) + if not escape_table[text] then + escape_table[text] = hash(text) + end + return escape_table[text] +end + +-- Escape characters that should not be disturbed by markdown. +function escape_special_chars(text) + local tokens = tokenize_html(text) + + local out = "" + for _, token in ipairs(tokens) do + local t = token.text + if token.type == "tag" then + -- In tags, encode * and _ so they don't conflict with their use in markdown. + t = t:gsub("%*", escape_table["*"]) + t = t:gsub("%_", escape_table["_"]) + else + t = encode_backslash_escapes(t) + end + out = out .. t + end + return out +end + +-- Encode backspace-escaped characters in the markdown source. +function encode_backslash_escapes(t) + for i=1,escape_chars:len() do + local c = escape_chars:sub(i,i) + t = t:gsub("\\%" .. c, escape_table[c]) + end + return t +end + +-- Unescape characters that have been encoded. +function unescape_special_chars(t) + local tin = t + for k,v in pairs(escape_table) do + k = k:gsub("%%", "%%%%") + t = t:gsub(v,k) + end + if t ~= tin then t = unescape_special_chars(t) end + return t +end + +-- Encode/escape certain characters inside Markdown code runs. +-- The point is that in code, these characters are literals, +-- and lose their special Markdown meanings. +function encode_code(s) + s = s:gsub("%&", "&") + s = s:gsub("<", "<") + s = s:gsub(">", ">") + for k,v in pairs(escape_table) do + s = s:gsub("%"..k, v) + end + return s +end + +-- Handle backtick blocks. +function code_spans(s) + s = s:gsub("\\\\", escape_table["\\"]) + s = s:gsub("\\`", escape_table["`"]) + + local pos = 1 + while true do + local start, stop = s:find("`+", pos) + if not start then return s end + local count = stop - start + 1 + -- Find a matching numbert of backticks + local estart, estop = s:find(string.rep("`", count), stop+1) + local brstart = s:find("\n", stop+1) + if estart and (not brstart or estart < brstart) then + local code = s:sub(stop+1, estart-1) + code = code:gsub("^[ \t]+", "") + code = code:gsub("[ \t]+$", "") + code = code:gsub(escape_table["\\"], escape_table["\\"] .. escape_table["\\"]) + code = code:gsub(escape_table["`"], escape_table["\\"] .. escape_table["`"]) + code = "" .. encode_code(code) .. "" + code = add_escape(code) + s = s:sub(1, start-1) .. code .. s:sub(estop+1) + pos = start + code:len() + else + pos = stop + 1 + end + end + return s +end + +-- Encode alt text... enodes &, and ". +function encode_alt(s) + if not s then return s end + s = s:gsub('&', '&') + s = s:gsub('"', '"') + s = s:gsub('<', '<') + return s +end + +-- Handle image references +function images(text) + local function reference_link(alt, id) + alt = encode_alt(alt:match("%b[]"):sub(2,-2)) + id = id:match("%[(.*)%]"):lower() + if id == "" then id = text:lower() end + link_database[id] = link_database[id] or {} + if not link_database[id].url then return nil end + local url = link_database[id].url or id + url = encode_alt(url) + local title = encode_alt(link_database[id].title) + if title then title = " title=\"" .. title .. "\"" else title = "" end + return add_escape ('' .. alt .. '") + end + + local function inline_link(alt, link) + alt = encode_alt(alt:match("%b[]"):sub(2,-2)) + local url, title = link:match("%(?[ \t]*['\"](.+)['\"]") + url = url or link:match("%(?%)") + url = encode_alt(url) + title = encode_alt(title) + if title then + return add_escape('' .. alt .. '') + else + return add_escape('' .. alt .. '') + end + end + + text = text:gsub("!(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link) + text = text:gsub("!(%b[])(%b())", inline_link) + return text +end + +-- Handle anchor references +function anchors(text) + local function reference_link(text, id) + text = text:match("%b[]"):sub(2,-2) + id = id:match("%b[]"):sub(2,-2):lower() + if id == "" then id = text:lower() end + link_database[id] = link_database[id] or {} + if not link_database[id].url then return nil end + local url = link_database[id].url or id + url = encode_alt(url) + local title = encode_alt(link_database[id].title) + if title then title = " title=\"" .. title .. "\"" else title = "" end + return add_escape("") .. text .. add_escape("") + end + + local function inline_link(text, link) + text = text:match("%b[]"):sub(2,-2) + local url, title = link:match("%(?[ \t]*['\"](.+)['\"]") + title = encode_alt(title) + url = url or link:match("%(?%)") or "" + url = encode_alt(url) + if title then + return add_escape("") .. text .. "" + else + return add_escape("") .. text .. add_escape("") + end + end + + text = text:gsub("(%b[])[ \t]*\n?[ \t]*(%b[])", reference_link) + text = text:gsub("(%b[])(%b())", inline_link) + return text +end + +-- Handle auto links, i.e. . +function auto_links(text) + local function link(s) + return add_escape("") .. s .. "" + end + -- Encode chars as a mix of dec and hex entitites to (perhaps) fool + -- spambots. + local function encode_email_address(s) + -- Use a deterministic encoding to make unit testing possible. + -- Code 45% hex, 45% dec, 10% plain. + local hex = {code = function(c) return "&#x" .. string.format("%x", c:byte()) .. ";" end, count = 1, rate = 0.45} + local dec = {code = function(c) return "&#" .. c:byte() .. ";" end, count = 0, rate = 0.45} + local plain = {code = function(c) return c end, count = 0, rate = 0.1} + local codes = {hex, dec, plain} + local function swap(t,k1,k2) local temp = t[k2] t[k2] = t[k1] t[k1] = temp end + + local out = "" + for i = 1,s:len() do + for _,code in ipairs(codes) do code.count = code.count + code.rate end + if codes[1].count < codes[2].count then swap(codes,1,2) end + if codes[2].count < codes[3].count then swap(codes,2,3) end + if codes[1].count < codes[2].count then swap(codes,1,2) end + + local code = codes[1] + local c = s:sub(i,i) + -- Force encoding of "@" to make email address more invisible. + if c == "@" and code == plain then code = codes[2] end + out = out .. code.code(c) + code.count = code.count - 1 + end + return out + end + local function mail(s) + s = unescape_special_chars(s) + local address = encode_email_address("mailto:" .. s) + local text = encode_email_address(s) + return add_escape("") .. text .. "" + end + -- links + text = text:gsub("<(https?:[^'\">%s]+)>", link) + text = text:gsub("<(ftp:[^'\">%s]+)>", link) + + -- mail + text = text:gsub("%s]+)>", mail) + text = text:gsub("<([-.%w]+%@[-.%w]+)>", mail) + return text +end + +-- Encode free standing amps (&) and angles (<)... note that this does not +-- encode free >. +function amps_and_angles(s) + -- encode amps not part of &..; expression + local pos = 1 + while true do + local amp = s:find("&", pos) + if not amp then break end + local semi = s:find(";", amp+1) + local stop = s:find("[ \t\n&]", amp+1) + if not semi or (stop and stop < semi) or (semi - amp) > 15 then + s = s:sub(1,amp-1) .. "&" .. s:sub(amp+1) + pos = amp+1 + else + pos = amp+1 + end + end + + -- encode naked <'s + s = s:gsub("<([^a-zA-Z/?$!])", "<%1") + s = s:gsub("<$", "<") + + -- what about >, nothing done in the original markdown source to handle them + return s +end + +-- Handles emphasis markers (* and _) in the text. +function emphasis(text) + for _, s in ipairs {"%*%*", "%_%_"} do + text = text:gsub(s .. "([^%s][%*%_]?)" .. s, "%1") + text = text:gsub(s .. "([^%s][^<>]-[^%s][%*%_]?)" .. s, "%1") + end + for _, s in ipairs {"%*", "%_"} do + text = text:gsub(s .. "([^%s_])" .. s, "%1") + text = text:gsub(s .. "([^%s_])" .. s, "%1") + text = text:gsub(s .. "([^%s_][^<>_]-[^%s_])" .. s, "%1") + text = text:gsub(s .. "([^<>_]-[^<>_]-[^<>_]-)" .. s, "%1") + end + return text +end + +-- Handles line break markers in the text. +function line_breaks(text) + return text:gsub(" +\n", "
    \n") +end + +-- Perform all span level transforms. +function span_transform(text) + text = code_spans(text) + text = escape_special_chars(text) + text = images(text) + text = anchors(text) + text = auto_links(text) + text = amps_and_angles(text) + text = emphasis(text) + text = line_breaks(text) + return text +end + +---------------------------------------------------------------------- +-- Markdown +---------------------------------------------------------------------- + +-- Cleanup the text by normalizing some possible variations to make further +-- processing easier. +function cleanup(text) + -- Standardize line endings + text = text:gsub("\r\n", "\n") -- DOS to UNIX + text = text:gsub("\r", "\n") -- Mac to UNIX + + -- Convert all tabs to spaces + text = detab(text) + + -- Strip lines with only spaces and tabs + while true do + local subs + text, subs = text:gsub("\n[ \t]+\n", "\n\n") + if subs == 0 then break end + end + + return "\n" .. text .. "\n" +end + +-- Strips link definitions from the text and stores the data in a lookup table. +function strip_link_definitions(text) + local linkdb = {} + + local function link_def(id, url, title) + id = id:match("%[(.+)%]"):lower() + linkdb[id] = linkdb[id] or {} + linkdb[id].url = url or linkdb[id].url + linkdb[id].title = title or linkdb[id].title + return "" + end + + local def_no_title = "\n ? ? ?(%b[]):[ \t]*\n?[ \t]*]+)>?[ \t]*" + local def_title1 = def_no_title .. "[ \t]+\n?[ \t]*[\"'(]([^\n]+)[\"')][ \t]*" + local def_title2 = def_no_title .. "[ \t]*\n[ \t]*[\"'(]([^\n]+)[\"')][ \t]*" + local def_title3 = def_no_title .. "[ \t]*\n?[ \t]+[\"'(]([^\n]+)[\"')][ \t]*" + + text = text:gsub(def_title1, link_def) + text = text:gsub(def_title2, link_def) + text = text:gsub(def_title3, link_def) + text = text:gsub(def_no_title, link_def) + return text, linkdb +end + +link_database = {} + +-- Main markdown processing function +function markdown(text) + init_hash(text) + init_escape_table() + + text = cleanup(text) + text = protect(text) + text, link_database = strip_link_definitions(text) + text = block_transform(text) + text = unescape_special_chars(text) + return text +end + +---------------------------------------------------------------------- +-- End of module +---------------------------------------------------------------------- + +setfenv(1, _G) +M.lock(M) + +-- Expose markdown function to the world +markdown = M.markdown + +-- Class for parsing command-line options +local OptionParser = {} +OptionParser.__index = OptionParser + +-- Creates a new option parser +function OptionParser:new() + local o = {short = {}, long = {}} + setmetatable(o, self) + return o +end + +-- Calls f() whenever a flag with specified short and long name is encountered +function OptionParser:flag(short, long, f) + local info = {type = "flag", f = f} + if short then self.short[short] = info end + if long then self.long[long] = info end +end + +-- Calls f(param) whenever a parameter flag with specified short and long name is encountered +function OptionParser:param(short, long, f) + local info = {type = "param", f = f} + if short then self.short[short] = info end + if long then self.long[long] = info end +end + +-- Calls f(v) for each non-flag argument +function OptionParser:arg(f) + self.arg = f +end + +-- Runs the option parser for the specified set of arguments. Returns true if all arguments +-- where successfully parsed and false otherwise. +function OptionParser:run(args) + local pos = 1 + while pos <= #args do + local arg = args[pos] + if arg == "--" then + for i=pos+1,#args do + if self.arg then self.arg(args[i]) end + return true + end + end + if arg:match("^%-%-") then + local info = self.long[arg:sub(3)] + if not info then print("Unknown flag: " .. arg) return false end + if info.type == "flag" then + info.f() + pos = pos + 1 + else + param = args[pos+1] + if not param then print("No parameter for flag: " .. arg) return false end + info.f(param) + pos = pos+2 + end + elseif arg:match("^%-") then + for i=2,arg:len() do + local c = arg:sub(i,i) + local info = self.short[c] + if not info then print("Unknown flag: -" .. c) return false end + if info.type == "flag" then + info.f() + else + if i == arg:len() then + param = args[pos+1] + if not param then print("No parameter for flag: -" .. c) return false end + info.f(param) + pos = pos + 1 + else + param = arg:sub(i+1) + info.f(param) + end + break + end + end + pos = pos + 1 + else + if self.arg then self.arg(arg) end + pos = pos + 1 + end + end + return true +end + +-- Handles the case when markdown is run from the command line +local function run_command_line(arg) + -- Generate output for input s given options + local function run(s, options) + s = markdown(s) + if not options.wrap_header then return s end + local header = "" + if options.header then + local f = io.open(options.header) or error("Could not open file: " .. options.header) + header = f:read("*a") + f:close() + else + header = [[ + + + + + TITLE + + + +]] + local title = options.title or s:match("

    (.-)

    ") or s:match("

    (.-)

    ") or + s:match("

    (.-)

    ") or "Untitled" + header = header:gsub("TITLE", title) + if options.inline_style then + local style = "" + local f = io.open(options.stylesheet) + if f then + style = f:read("*a") f:close() + else + error("Could not include style sheet " .. options.stylesheet .. ": File not found") + end + header = header:gsub('', + "") + else + header = header:gsub("STYLESHEET", options.stylesheet) + end + header = header:gsub("CHARSET", options.charset) + end + local footer = "" + if options.footer then + local f = io.open(options.footer) or error("Could not open file: " .. options.footer) + footer = f:read("*a") + f:close() + end + return header .. s .. footer + end + + -- Generate output path name from input path name given options. + local function outpath(path, options) + if options.append then return path .. ".html" end + local m = path:match("^(.+%.html)[^/\\]+$") if m then return m end + m = path:match("^(.+%.)[^/\\]*$") if m and path ~= m .. "html" then return m .. "html" end + return path .. ".html" + end + + -- Default commandline options + local options = { + wrap_header = true, + header = nil, + footer = nil, + charset = "utf-8", + title = nil, + stylesheet = "default.css", + inline_style = false + } + local help = [[ +Usage: markdown.lua [OPTION] [FILE] +Runs the markdown text markup to HTML converter on each file specified on the +command line. If no files are specified, runs on standard input. + +No header: + -n, --no-wrap Don't wrap the output in ... tags. +Custom header: + -e, --header FILE Use content of FILE for header. + -f, --footer FILE Use content of FILE for footer. +Generated header: + -c, --charset SET Specifies charset (default utf-8). + -i, --title TITLE Specifies title (default from first

    tag). + -s, --style STYLE Specifies style sheet file (default default.css). + -l, --inline-style Include the style sheet file inline in the header. +Generated files: + -a, --append Append .html extension (instead of replacing). +Other options: + -h, --help Print this help text. + -t, --test Run the unit tests. +]] + + local run_stdin = true + local op = OptionParser:new() + op:flag("n", "no-wrap", function () options.wrap_header = false end) + op:param("e", "header", function (x) options.header = x end) + op:param("f", "footer", function (x) options.footer = x end) + op:param("c", "charset", function (x) options.charset = x end) + op:param("i", "title", function(x) options.title = x end) + op:param("s", "style", function(x) options.stylesheet = x end) + op:flag("l", "inline-style", function(x) options.inline_style = true end) + op:flag("a", "append", function() options.append = true end) + op:flag("t", "test", function() + local n = arg[0]:gsub("markdown.lua", "markdown-tests.lua") + local f = io.open(n) + if f then + f:close() dofile(n) + else + error("Cannot find markdown-tests.lua") + end + run_stdin = false + end) + op:flag("h", "help", function() print(help) run_stdin = false end) + op:arg(function(path) + local file = io.open(path) or error("Could not open file: " .. path) + local s = file:read("*a") + file:close() + s = run(s, options) + file = io.open(outpath(path, options), "w") or error("Could not open output file: " .. outpath(path, options)) + file:write(s) + file:close() + run_stdin = false + end + ) + + if not op:run(arg) then + print(help) + run_stdin = false + end + + if run_stdin then + local s = io.read("*a") + s = run(s, options) + io.write(s) + end +end + +-- If we are being run from the command-line, act accordingly +if arg and arg[0]:find("markdown%.lua$") then + run_command_line(arg) +else + return markdown +end \ No newline at end of file