add text parsing and rendering.

soasme · Oct 13, 2018 · 6f5ce59 · 6f5ce59
1 parent 5876d3a
commit 6f5ce59
Show file tree

Hide file tree

Showing 3 changed files with 38 additions and 20 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1 +1,3 @@
 nimcache/
+markdown
+tests/test1
diff --git a/src/markdownpkg/submodule.nim b/src/markdownpkg/submodule.nim
@@ -6,14 +6,17 @@
 import re, strutils, strformat, tables, sequtils, math
 
 type
+    MarkdownError* = object of Exception
+
     # Type for header element
     Header* = object
         doc: string
         level: int
 
     # Signify the token type
     MarkdownTokenType* {.pure.} = enum
-        Header
+        Header,
+        Text
 
     # Hold two values: type: MarkdownTokenType, and xyzValue.
     # xyz is the particular type name.
@@ -23,9 +26,11 @@ type
         len: int
         case type*: MarkdownTokenType
         of MarkdownTokenType.Header: headerVal*: Header
+        of MarkdownTokenType.Text: textVal*: string
 
 var blockRules = @{
-    MarkdownTokenType.Header: re"^ *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)"
+    MarkdownTokenType.Header: re"^ *(#{1,6}) *([^\n]+?) *#* *(?:\n+|$)",
+    MarkdownTokenType.Text: re"^([^\n]+)",
 }.newTable
 
 # Replace `<` and `>` to HTML-safe characters.
@@ -77,19 +82,22 @@ proc findToken(doc: string, start: int, ruleType: MarkdownTokenType, regex: Rege
         var val: Header
         val.level = matches[0].len
         val.doc = matches[1]
-        result = MarkdownTokenRef(pos: start, len: size, type: MarkdownTokenType.Header, headerVal: val)  
+        result = MarkdownTokenRef(pos: start, len: size, type: MarkdownTokenType.Header, headerVal: val) 
+    of MarkdownTokenType.Text:
+        result = MarkdownTokenRef(pos: start, len: size, type: MarkdownTokenType.Text, textVal: matches[0]) 
 
 # Parse markdown document into a sequence of tokens.
 iterator parseTokens(doc: string): MarkdownTokenRef =
     var n = 0
-    while n < doc.len:
-        for ruleType, ruleRegex in blockRules:
-            let token = findToken(doc, n, ruleType, ruleRegex)
-            if token != nil:
-                n += token.len
-                yield token
-                break
-
+    block parseBlock:
+        while n < doc.len:
+            for ruleType, ruleRegex in blockRules:
+                let token = findToken(doc, n, ruleType, ruleRegex)
+                if token != nil:
+                    n += token.len
+                    yield token
+                    break parseBlock
+            raise newException(MarkdownError, fmt"unknown block rule at position {n}.")
 
 # Render header tag, for example, `<h1>`, `<h2>`, etc.
 # Example:
@@ -98,10 +106,15 @@ iterator parseTokens(doc: string): MarkdownTokenRef =
 proc renderHeader*(header: Header): string =
     result = fmt"<h{header.level}>{header.doc}</h{header.level}>"
 
+proc renderText*(text: string): string =
+    result = text.escapeAmpersandSeq.escapeTag
+
 proc renderToken(token: MarkdownTokenRef): string =
     case token.type
     of MarkdownTokenType.Header:
         result = renderHeader(token.headerVal)
+    of MarkdownTokenType.Text:
+        result = renderText(token.textVal)
 
 # Turn markdown-formatted string into HTML-formatting string.
 # By setting `escapse` to false, no HTML tag will be escaped.

diff --git a/tests/test1.nim b/tests/test1.nim
@@ -22,12 +22,15 @@ test "escape & sequence":
   check escapeAmpersandSeq("hello & world") == "hello &amp; world"
   check escapeAmpersandSeq("hello &amp; world") == "hello &amp; world"
 
-  test "test headers":
-    check markdown("#h1") == "<h1>h1</h1>"
-    check markdown("# h1") == "<h1>h1</h1>"
-    check markdown(" #h1") == "<h1>h1</h1>"
-    check markdown("## h2") == "<h2>h2</h2>"
-    check markdown("### h3") == "<h3>h3</h3>"
-    check markdown("#### h4") == "<h4>h4</h4>"
-    check markdown("##### h5") == "<h5>h5</h5>"
-    check markdown("###### h6") == "<h6>h6</h6>"
+test "headers":
+  check markdown("#h1") == "<h1>h1</h1>"
+  check markdown("# h1") == "<h1>h1</h1>"
+  check markdown(" #h1") == "<h1>h1</h1>"
+  check markdown("## h2") == "<h2>h2</h2>"
+  check markdown("### h3") == "<h3>h3</h3>"
+  check markdown("#### h4") == "<h4>h4</h4>"
+  check markdown("##### h5") == "<h5>h5</h5>"
+  check markdown("###### h6") == "<h6>h6</h6>"
+
+test "text":
+  check markdown("hello world") == "hello world"