aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonas Smedegaard <dr@jones.dk>2025-05-17 16:34:36 +0200
committerJonas Smedegaard <dr@jones.dk>2025-05-17 17:49:19 +0200
commit20a7517bf51a5ed1b36d0c134d1bf3f5d01b2e4a (patch)
treecaad8a508555fbce26baee69841375ca0209eb17
parentb4125c5a22700f86784b94fdb8587b7cef8d110b (diff)
add chapter on Markdown and annotation
-rw-r--r--_def_dia.qmd35
-rw-r--r--_markdown.qmd65
-rw-r--r--def.peg34
-rwxr-xr-xdef_AnnotatedWords.py19
-rwxr-xr-xdef_Block.py19
-rwxr-xr-xdef_HardBreak.py16
-rwxr-xr-xdef_Header.py17
-rwxr-xr-xdef_LinkDefinition.py31
-rwxr-xr-xdef_LinkedWords.py36
-rwxr-xr-xdef_List.py28
-rwxr-xr-xdef_Markdown.py13
-rwxr-xr-xdef_NEWLINE.py18
-rwxr-xr-xdef_PRINTABLES.py14
-rwxr-xr-xdef_Paragraph.py29
-rwxr-xr-xdef_PlainWords.py18
-rwxr-xr-xdef_SPACE.py14
-rwxr-xr-xdef_StyledWords.py30
-rwxr-xr-xdef_Uri.py21
-rwxr-xr-xdef_Words.py19
-rw-r--r--ref.bib24
-rw-r--r--report.qmd13
21 files changed, 513 insertions, 0 deletions
diff --git a/_def_dia.qmd b/_def_dia.qmd
new file mode 100644
index 0000000..767e253
--- /dev/null
+++ b/_def_dia.qmd
@@ -0,0 +1,35 @@
+Subsets of Markdown syntax structure
+expressed as syntax diagrams (a.k.a. railroad diagrams),
+involving the negative predicate from PEG notation.
+
+![Markdown](def_Markdown.svg)
+
+![Block](def_Block.svg)
+
+![Header](def_Header.svg)
+
+![List](def_List.svg)
+
+![LinkDefinition](def_LinkDefinition.svg)
+
+![Paragraph](def_Paragraph.svg)
+
+![Words](def_Words.svg)
+
+![StyledWords](def_StyledWords.svg)
+
+![LinkedWords](def_LinkedWords.svg)
+
+![AnnotatedWords](def_AnnotatedWords.svg)
+
+![PlainWords](def_PlainWords.svg)
+
+![Url](def_Uri.svg)
+
+![HardBreak](def_HardBreak.svg)
+
+![PRINTABLES](def_PRINTABLES.svg)
+
+![SPACE](def_SPACE.svg)
+
+![NEWLINE](def_NEWLINE.svg)
diff --git a/_markdown.qmd b/_markdown.qmd
new file mode 100644
index 0000000..6966e99
--- /dev/null
+++ b/_markdown.qmd
@@ -0,0 +1,65 @@
+Markdown consists of blocks of content,
+optionally prepended a set of Metadata blocks.
+Visually, this can be described using a syntax diagram
+where the possible order of elements are laid out
+like trains on rails,
+as seen in @fig-def-Markdown and @fig-def-Block.
+
+![Markdown](def_Markdown.svg){#fig-def-Markdown}
+
+![Block](def_Block.svg){#fig-def-Block}
+
+Reading order matter.
+These syntax diagrams should be read left-to-right and top-to-bottom,
+also at places with choice --
+e.g. the block type "Header" should be tried before "Paragraph".
+since (as elaborated below) a paragraph begins with any words,
+including the initial words defitive for other block types.
+In other words,
+these syntax diagrams do not reflect the more common EBNF grammars,
+but instead a parsing expression grammar [@Ford2004],
+because context-free grammars are unlikely to cover Markdown
+[@MacFarlane2014].
+The grammar is included as [Appendix @sec-def-peg].
+
+The most common content block is a paragraph,
+which consists of lines of space-delimited words
+followed by two or more line breaks.
+
+![Paragraph](def_Paragraph.svg){#fig-def-Paragraph}
+
+Words are sets of printable characters
+(including punctuation and other printable characters).
+they can be styled
+(@fig-def-StyledWords),
+have a hyperlink attached
+(@fig-def-LinkedWords)
+and have annotations attached
+(@fig-def-AnnotatedWords).
+
+![StyledWords](def_StyledWords.svg){#fig-def-StyledWords}
+
+![LinkedWords](def_LinkedWords.svg){#fig-def-LinkedWords}
+
+![AnnotatedWords](def_AnnotatedWords.svg){#fig-def-AnnotatedWords}
+
+![PlainWords](def_PlainWords.svg){#fig-def-PlainWords}
+
+Other content blocks include a header
+consisting of words
+(@fig-def-Header),
+and a list consisting of list items,
+each containing a block
+(@fig-def-List).
+
+![Header](def_Header.svg){#fig-def-Header}
+
+![List](def_List.svg){#fig-def-List}
+
+Yet other content blocks and inline types exist.
+Those are omitted in this description,
+which is limited to the comonents affected
+by extending the Markdown language with additional types of annotation.
+
+Syntax diagrams for additional Markdown components are included
+as [Appendix @sec-def-dia].
diff --git a/def.peg b/def.peg
new file mode 100644
index 0000000..163b5a4
--- /dev/null
+++ b/def.peg
@@ -0,0 +1,34 @@
+# Subsets of Markdown syntax structure
+# expressed in parsing expression grammar (PEG) notation.
+#
+# Copyright 2025, Jonas Smedegaard <dr@jones.dk>
+# SPDX-License-Identifier: GPL-3+
+
+# Document
+Markdown <- MetaBlock* Block*
+
+# Block elements
+Block <- Header / List / LinkDefinition / Paragraph
+Header <- '#'+ SPACE* Words? SPACE* NEWLINE
+List <- ([-*]+ / [[:digit:]]+ [).]) (SPACE+ (List / Words))?
+ NEWLINE
+LinkDefinition <- '[' !'[]' PlainWords ']:' SPACE Uri NEWLINE
+ SPACE SPACE '"' !'"' PlainWords '"' NEWLINE
+Paragraph <- Words (SPACE? NEWLINE Words / HardBreak Words)*
+ NEWLINE NEWLINE
+
+# Inline elements
+Words <- StyledWords / LinkedWords / AnnotatedWords / PlainWords
+StyledWords <- '*' Words '*' / '**' Words '**' / '_' Words '_'
+LinkedWords <- '[' Words ']' '(' Uri ')'
+ / '[' Words ']' '[' ![\[\]] LinkDefinitionKeyphrase ']'
+ / '[' LinkDefinitionKeyphrase ']'
+AnnotatedWords <- '[' Words ']' '{' ![{}] PlainWords '}'
+PlainWords <- PRINTABLES (SPACE PRINTABLES)*
+Uri <- '<' ![<>] PRINTABLES? '>' / ![<>] PRINTABLES
+HardBreak <- SPACE SPACE NEWLINE
+
+# Terminals
+PRINTABLES <- [[:graph:]]+
+SPACE <- ' '
+NEWLINE <- '\r\n' / '\n' / '\r'
diff --git a/def_AnnotatedWords.py b/def_AnnotatedWords.py
new file mode 100755
index 0000000..842301b
--- /dev/null
+++ b/def_AnnotatedWords.py
@@ -0,0 +1,19 @@
+#!/usr/bin/python3
+
+# Copyright 2025, Jonas Smedegaard <dr@jones.dk>
+# SPDX-License-Identifier: GPL-3+
+
+from os.path import basename, splitext
+from railroad import *
+
+Diagram(
+ Start("complex"),
+ "[",
+ NonTerminal("Words"),
+ "]",
+ "{",
+ NonTerminal('![{}] PlainWords'),
+ "}",
+ End("complex")
+).writeStandalone(
+ open(splitext(basename(__file__))[0] + ".svg", "w").write)
diff --git a/def_Block.py b/def_Block.py
new file mode 100755
index 0000000..3dcd10f
--- /dev/null
+++ b/def_Block.py
@@ -0,0 +1,19 @@
+#!/usr/bin/python3
+
+# Copyright 2025, Jonas Smedegaard <dr@jones.dk>
+# SPDX-License-Identifier: GPL-3+
+
+from os.path import basename, splitext
+from railroad import *
+
+Diagram(
+ Start("complex"),
+ Choice(0,
+ NonTerminal("Header"),
+ NonTerminal("List"),
+ NonTerminal("LinkDefinition"),
+ NonTerminal("Paragraph")
+ ),
+ End("complex")
+).writeStandalone(
+ open(splitext(basename(__file__))[0] + ".svg", "w").write)
diff --git a/def_HardBreak.py b/def_HardBreak.py
new file mode 100755
index 0000000..386487d
--- /dev/null
+++ b/def_HardBreak.py
@@ -0,0 +1,16 @@
+#!/usr/bin/python3
+
+# Copyright 2025, Jonas Smedegaard <dr@jones.dk>
+# SPDX-License-Identifier: GPL-3+
+
+from os.path import basename, splitext
+from railroad import *
+
+Diagram(
+ Start("complex"),
+ NonTerminal("SPACE"),
+ NonTerminal("SPACE"),
+ NonTerminal("NEWLINE"),
+ End("complex")
+).writeStandalone(
+ open(splitext(basename(__file__))[0] + ".svg", "w").write)
diff --git a/def_Header.py b/def_Header.py
new file mode 100755
index 0000000..2559443
--- /dev/null
+++ b/def_Header.py
@@ -0,0 +1,17 @@
+#!/usr/bin/python3
+
+# Copyright 2025, Jonas Smedegaard <dr@jones.dk>
+# SPDX-License-Identifier: GPL-3+
+
+from os.path import basename, splitext
+from railroad import *
+
+Diagram(
+ Start("complex"),
+ OneOrMore('#'),
+ ZeroOrMore(NonTerminal("SPACE")),
+ Optional(NonTerminal("Words")),
+ NonTerminal("NEWLINE"),
+ End("complex")
+).writeStandalone(
+ open(splitext(basename(__file__))[0] + ".svg", "w").write)
diff --git a/def_LinkDefinition.py b/def_LinkDefinition.py
new file mode 100755
index 0000000..b2b5b15
--- /dev/null
+++ b/def_LinkDefinition.py
@@ -0,0 +1,31 @@
+#!/usr/bin/python3
+
+# Copyright 2025, Jonas Smedegaard <dr@jones.dk>
+# SPDX-License-Identifier: GPL-3+
+
+from os.path import basename, splitext
+from railroad import *
+
+Diagram(
+ Start("complex"),
+ Stack(
+ Sequence(
+ "[",
+ NonTerminal("![\[\]] PlainWords"),
+ "]",
+ NonTerminal("SPACE"),
+ NonTerminal("Uri"),
+ NonTerminal("NEWLINE")
+ ),
+ Sequence(
+ NonTerminal("SPACE"),
+ NonTerminal("SPACE"),
+ '"',
+ NonTerminal("!'\"' PlainWords"),
+ '"',
+ NonTerminal("NEWLINE")
+ )
+ ),
+ End("complex")
+).writeStandalone(
+ open(splitext(basename(__file__))[0] + ".svg", "w").write)
diff --git a/def_LinkedWords.py b/def_LinkedWords.py
new file mode 100755
index 0000000..1185b8a
--- /dev/null
+++ b/def_LinkedWords.py
@@ -0,0 +1,36 @@
+#!/usr/bin/python3
+
+# Copyright 2025, Jonas Smedegaard <dr@jones.dk>
+# SPDX-License-Identifier: GPL-3+
+
+from os.path import basename, splitext
+from railroad import *
+
+Diagram(
+ Start("complex"),
+ Choice(0,
+ Sequence(
+ "[",
+ NonTerminal("Words"),
+ "]",
+ "(",
+ NonTerminal("Uri"),
+ ")"
+ ),
+ Sequence(
+ "[",
+ NonTerminal("Words"),
+ "]",
+ "[",
+ NonTerminal("LinkDefinitionKey"),
+ "]"
+ ),
+ Sequence(
+ "[",
+ NonTerminal("LinkDefinitionKey"),
+ "]",
+ )
+ ),
+ End("complex")
+).writeStandalone(
+ open(splitext(basename(__file__))[0] + ".svg", "w").write)
diff --git a/def_List.py b/def_List.py
new file mode 100755
index 0000000..f5e8058
--- /dev/null
+++ b/def_List.py
@@ -0,0 +1,28 @@
+#!/usr/bin/python3
+
+# Copyright 2025, Jonas Smedegaard <dr@jones.dk>
+# SPDX-License-Identifier: GPL-3+
+
+from os.path import basename, splitext
+from railroad import *
+
+Diagram(
+ Start("complex"),
+ Choice(0,
+ "[-*]",
+ Sequence(
+ "[[:digit:]]+",
+ "[).]",
+ )
+ ),
+ Optional(
+ OneOrMore(NonTerminal("SPACE")),
+ Choice(0,
+ NonTerminal("List"),
+ NonTerminal("Words")
+ )
+ ),
+ NonTerminal("NEWLINE"),
+ End("complex")
+).writeStandalone(
+ open(splitext(basename(__file__))[0] + ".svg", "w").write)
diff --git a/def_Markdown.py b/def_Markdown.py
new file mode 100755
index 0000000..ce66425
--- /dev/null
+++ b/def_Markdown.py
@@ -0,0 +1,13 @@
+#!/usr/bin/python3
+
+# Copyright 2025, Jonas Smedegaard <dr@jones.dk>
+# SPDX-License-Identifier: GPL-3+
+
+from os.path import basename, splitext
+from railroad import *
+
+Diagram(
+ ZeroOrMore(NonTerminal("MetaBlock")),
+ ZeroOrMore(NonTerminal("Block"))
+).writeStandalone(
+ open(splitext(basename(__file__))[0] + ".svg", "w").write)
diff --git a/def_NEWLINE.py b/def_NEWLINE.py
new file mode 100755
index 0000000..5adbca6
--- /dev/null
+++ b/def_NEWLINE.py
@@ -0,0 +1,18 @@
+#!/usr/bin/python3
+
+# Copyright 2025, Jonas Smedegaard <dr@jones.dk>
+# SPDX-License-Identifier: GPL-3+
+
+from os.path import basename, splitext
+from railroad import *
+
+Diagram(
+ Start("complex"),
+ Choice(0,
+ '\\r\\n',
+ '\\n',
+ '\\r',
+ ),
+ End("complex")
+).writeStandalone(
+ open(splitext(basename(__file__))[0] + ".svg", "w").write)
diff --git a/def_PRINTABLES.py b/def_PRINTABLES.py
new file mode 100755
index 0000000..d5aed28
--- /dev/null
+++ b/def_PRINTABLES.py
@@ -0,0 +1,14 @@
+#!/usr/bin/python3
+
+# Copyright 2025, Jonas Smedegaard <dr@jones.dk>
+# SPDX-License-Identifier: GPL-3+
+
+from os.path import basename, splitext
+from railroad import *
+
+Diagram(
+ Start("complex"),
+ OneOrMore("[[:graph:]]+"),
+ End("complex")
+).writeStandalone(
+ open(splitext(basename(__file__))[0] + ".svg", "w").write)
diff --git a/def_Paragraph.py b/def_Paragraph.py
new file mode 100755
index 0000000..3513315
--- /dev/null
+++ b/def_Paragraph.py
@@ -0,0 +1,29 @@
+#!/usr/bin/python3
+
+# Copyright 2025, Jonas Smedegaard <dr@jones.dk>
+# SPDX-License-Identifier: GPL-3+
+
+from os.path import basename, splitext
+from railroad import *
+
+Diagram(
+ Start("complex"),
+ NonTerminal("Words"),
+ ZeroOrMore(
+ Choice(0,
+ Sequence(
+ Optional(NonTerminal("SPACE")),
+ NonTerminal("NEWLINE"),
+ NonTerminal("Words")
+ ),
+ Sequence(
+ NonTerminal("HardBreak"),
+ NonTerminal("Words")
+ )
+ )
+ ),
+ NonTerminal("NEWLINE"),
+ OneOrMore(NonTerminal("NEWLINE")),
+ End("complex")
+).writeStandalone(
+ open(splitext(basename(__file__))[0] + ".svg", "w").write)
diff --git a/def_PlainWords.py b/def_PlainWords.py
new file mode 100755
index 0000000..42764bc
--- /dev/null
+++ b/def_PlainWords.py
@@ -0,0 +1,18 @@
+#!/usr/bin/python3
+
+# Copyright 2025, Jonas Smedegaard <dr@jones.dk>
+# SPDX-License-Identifier: GPL-3+
+
+from os.path import basename, splitext
+from railroad import *
+
+Diagram(
+ Start("complex"),
+ NonTerminal("PRINTABLES"),
+ ZeroOrMore(
+ NonTerminal("SPACE"),
+ NonTerminal("PRINTABLES")
+ ),
+ End("complex")
+).writeStandalone(
+ open(splitext(basename(__file__))[0] + ".svg", "w").write)
diff --git a/def_SPACE.py b/def_SPACE.py
new file mode 100755
index 0000000..9a6353b
--- /dev/null
+++ b/def_SPACE.py
@@ -0,0 +1,14 @@
+#!/usr/bin/python3
+
+# Copyright 2025, Jonas Smedegaard <dr@jones.dk>
+# SPDX-License-Identifier: GPL-3+
+
+from os.path import basename, splitext
+from railroad import *
+
+Diagram(
+ Start("complex"),
+ "' '",
+ End("complex")
+).writeStandalone(
+ open(splitext(basename(__file__))[0] + ".svg", "w").write)
diff --git a/def_StyledWords.py b/def_StyledWords.py
new file mode 100755
index 0000000..c87ae6a
--- /dev/null
+++ b/def_StyledWords.py
@@ -0,0 +1,30 @@
+#!/usr/bin/python3
+
+# Copyright 2025, Jonas Smedegaard <dr@jones.dk>
+# SPDX-License-Identifier: GPL-3+
+
+from os.path import basename, splitext
+from railroad import *
+
+Diagram(
+ Start("complex"),
+ Choice(0,
+ Sequence(
+ "*",
+ NonTerminal("Words"),
+ "*"
+ ),
+ Sequence(
+ "**",
+ NonTerminal("Words"),
+ "**"
+ ),
+ Sequence(
+ "_",
+ NonTerminal("Words"),
+ "_"
+ )
+ ),
+ End("complex")
+).writeStandalone(
+ open(splitext(basename(__file__))[0] + ".svg", "w").write)
diff --git a/def_Uri.py b/def_Uri.py
new file mode 100755
index 0000000..79ef58a
--- /dev/null
+++ b/def_Uri.py
@@ -0,0 +1,21 @@
+#!/usr/bin/python3
+
+# Copyright 2025, Jonas Smedegaard <dr@jones.dk>
+# SPDX-License-Identifier: GPL-3+
+
+from os.path import basename, splitext
+from railroad import *
+
+Diagram(
+ Start("complex"),
+ Choice(0,
+ Sequence(
+ "<",
+ NonTerminal("![<>] PRINTABLES"),
+ ">",
+ ),
+ NonTerminal("![<>] PRINTABLES"),
+ ),
+ End("complex")
+).writeStandalone(
+ open(splitext(basename(__file__))[0] + ".svg", "w").write)
diff --git a/def_Words.py b/def_Words.py
new file mode 100755
index 0000000..cfc07dc
--- /dev/null
+++ b/def_Words.py
@@ -0,0 +1,19 @@
+#!/usr/bin/python3
+
+# Copyright 2025, Jonas Smedegaard <dr@jones.dk>
+# SPDX-License-Identifier: GPL-3+
+
+from os.path import basename, splitext
+from railroad import *
+
+Diagram(
+ Start("complex"),
+ Choice(0,
+ NonTerminal("StyledWords"),
+ NonTerminal("LinkedWords"),
+ NonTerminal("AnnotatedWords"),
+ NonTerminal("PlainWords")
+ ),
+ End("complex")
+).writeStandalone(
+ open(splitext(basename(__file__))[0] + ".svg", "w").write)
diff --git a/ref.bib b/ref.bib
index 18fda32..fe410b6 100644
--- a/ref.bib
+++ b/ref.bib
@@ -275,6 +275,30 @@
file = {:Adobe2012 - Extensible Metadata Platform (XMP) Specification.pdf:PDF},
}
+@Article{Ford2004,
+ author = {Bryan Ford},
+ date = {2004-01},
+ journaltitle = {ACM SIGPLAN Notices},
+ title = {Parsing expression grammars},
+ doi = {10.1145/982962.964011},
+ issn = {1558-1160},
+ number = {1},
+ pages = {111--122},
+ subtitle = {a recognition-based syntactic foundation},
+ volume = {39},
+ publisher = {Association for Computing Machinery (ACM)},
+}
+
+@Online{MacFarlane2014,
+ author = {John {MacFarlane}},
+ date = {2014-10-25},
+ title = {If anyone wants to contribute a BNF, please do! But I'm very skeptical that it can be done, due to},
+ url = {https://github.com/commonmark/commonmark-spec/issues/113#issuecomment-60467783},
+ organization = {GitHub, Inc.},
+ titleaddon = {[Comment on code issue "Add BNF specification"]},
+ urldate = {2025-05-17},
+}
+
@Comment{jabref-meta: databaseType:biblatex;}
@Comment{jabref-meta: fileDirectory-jonas-bastian:/home/jonas/Projects/RUC/LIB/md;}
diff --git a/report.qmd b/report.qmd
index 00e9959..42818d6 100644
--- a/report.qmd
+++ b/report.qmd
@@ -67,6 +67,10 @@ are editorial notes not intented for inclusion in the final delivery.*
{{< include _intro.qmd >}}
+# Markdown and annotation
+
+{{< include _markdown.qmd >}}
+
# Analysis of existing framework
{{< include _background.qmd >}}
@@ -105,3 +109,12 @@ are editorial notes not intented for inclusion in the final delivery.*
```{.lua include="_extensions/ruc-play/semantic-markdown/semantic-markdown.lua" code-line-numbers="true"}
```
+
+# Markdown syntax as PEG {.appendix #sec-def-peg}
+
+```{.peg include="def.peg" code-line-numbers="true"}
+```
+
+# Markdown syntax as syntax diagrams {.appendix #sec-def-dia}
+
+{{< include _def_dia.qmd >}}