diff options
| author | Jonas Smedegaard <dr@jones.dk> | 2025-05-17 16:34:36 +0200 |
|---|---|---|
| committer | Jonas Smedegaard <dr@jones.dk> | 2025-05-17 17:49:19 +0200 |
| commit | 20a7517bf51a5ed1b36d0c134d1bf3f5d01b2e4a (patch) | |
| tree | caad8a508555fbce26baee69841375ca0209eb17 | |
| parent | b4125c5a22700f86784b94fdb8587b7cef8d110b (diff) | |
add chapter on Markdown and annotation
| -rw-r--r-- | _def_dia.qmd | 35 | ||||
| -rw-r--r-- | _markdown.qmd | 65 | ||||
| -rw-r--r-- | def.peg | 34 | ||||
| -rwxr-xr-x | def_AnnotatedWords.py | 19 | ||||
| -rwxr-xr-x | def_Block.py | 19 | ||||
| -rwxr-xr-x | def_HardBreak.py | 16 | ||||
| -rwxr-xr-x | def_Header.py | 17 | ||||
| -rwxr-xr-x | def_LinkDefinition.py | 31 | ||||
| -rwxr-xr-x | def_LinkedWords.py | 36 | ||||
| -rwxr-xr-x | def_List.py | 28 | ||||
| -rwxr-xr-x | def_Markdown.py | 13 | ||||
| -rwxr-xr-x | def_NEWLINE.py | 18 | ||||
| -rwxr-xr-x | def_PRINTABLES.py | 14 | ||||
| -rwxr-xr-x | def_Paragraph.py | 29 | ||||
| -rwxr-xr-x | def_PlainWords.py | 18 | ||||
| -rwxr-xr-x | def_SPACE.py | 14 | ||||
| -rwxr-xr-x | def_StyledWords.py | 30 | ||||
| -rwxr-xr-x | def_Uri.py | 21 | ||||
| -rwxr-xr-x | def_Words.py | 19 | ||||
| -rw-r--r-- | ref.bib | 24 | ||||
| -rw-r--r-- | report.qmd | 13 |
21 files changed, 513 insertions, 0 deletions
diff --git a/_def_dia.qmd b/_def_dia.qmd new file mode 100644 index 0000000..767e253 --- /dev/null +++ b/_def_dia.qmd @@ -0,0 +1,35 @@ +Subsets of Markdown syntax structure +expressed as syntax diagrams (a.k.a. railroad diagrams), +involving the negative predicate from PEG notation. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/_markdown.qmd b/_markdown.qmd new file mode 100644 index 0000000..6966e99 --- /dev/null +++ b/_markdown.qmd @@ -0,0 +1,65 @@ +Markdown consists of blocks of content, +optionally prepended a set of Metadata blocks. +Visually, this can be described using a syntax diagram +where the possible order of elements are laid out +like trains on rails, +as seen in @fig-def-Markdown and @fig-def-Block. + +{#fig-def-Markdown} + +{#fig-def-Block} + +Reading order matter. +These syntax diagrams should be read left-to-right and top-to-bottom, +also at places with choice -- +e.g. the block type "Header" should be tried before "Paragraph". +since (as elaborated below) a paragraph begins with any words, +including the initial words defitive for other block types. +In other words, +these syntax diagrams do not reflect the more common EBNF grammars, +but instead a parsing expression grammar [@Ford2004], +because context-free grammars are unlikely to cover Markdown +[@MacFarlane2014]. +The grammar is included as [Appendix @sec-def-peg]. + +The most common content block is a paragraph, +which consists of lines of space-delimited words +followed by two or more line breaks. + +{#fig-def-Paragraph} + +Words are sets of printable characters +(including punctuation and other printable characters). +they can be styled +(@fig-def-StyledWords), +have a hyperlink attached +(@fig-def-LinkedWords) +and have annotations attached +(@fig-def-AnnotatedWords). + +{#fig-def-StyledWords} + +{#fig-def-LinkedWords} + +{#fig-def-AnnotatedWords} + +{#fig-def-PlainWords} + +Other content blocks include a header +consisting of words +(@fig-def-Header), +and a list consisting of list items, +each containing a block +(@fig-def-List). + +{#fig-def-Header} + +{#fig-def-List} + +Yet other content blocks and inline types exist. +Those are omitted in this description, +which is limited to the comonents affected +by extending the Markdown language with additional types of annotation. + +Syntax diagrams for additional Markdown components are included +as [Appendix @sec-def-dia]. @@ -0,0 +1,34 @@ +# Subsets of Markdown syntax structure +# expressed in parsing expression grammar (PEG) notation. +# +# Copyright 2025, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ + +# Document +Markdown <- MetaBlock* Block* + +# Block elements +Block <- Header / List / LinkDefinition / Paragraph +Header <- '#'+ SPACE* Words? SPACE* NEWLINE +List <- ([-*]+ / [[:digit:]]+ [).]) (SPACE+ (List / Words))? + NEWLINE +LinkDefinition <- '[' !'[]' PlainWords ']:' SPACE Uri NEWLINE + SPACE SPACE '"' !'"' PlainWords '"' NEWLINE +Paragraph <- Words (SPACE? NEWLINE Words / HardBreak Words)* + NEWLINE NEWLINE + +# Inline elements +Words <- StyledWords / LinkedWords / AnnotatedWords / PlainWords +StyledWords <- '*' Words '*' / '**' Words '**' / '_' Words '_' +LinkedWords <- '[' Words ']' '(' Uri ')' + / '[' Words ']' '[' ![\[\]] LinkDefinitionKeyphrase ']' + / '[' LinkDefinitionKeyphrase ']' +AnnotatedWords <- '[' Words ']' '{' ![{}] PlainWords '}' +PlainWords <- PRINTABLES (SPACE PRINTABLES)* +Uri <- '<' ![<>] PRINTABLES? '>' / ![<>] PRINTABLES +HardBreak <- SPACE SPACE NEWLINE + +# Terminals +PRINTABLES <- [[:graph:]]+ +SPACE <- ' ' +NEWLINE <- '\r\n' / '\n' / '\r' diff --git a/def_AnnotatedWords.py b/def_AnnotatedWords.py new file mode 100755 index 0000000..842301b --- /dev/null +++ b/def_AnnotatedWords.py @@ -0,0 +1,19 @@ +#!/usr/bin/python3 + +# Copyright 2025, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ + +from os.path import basename, splitext +from railroad import * + +Diagram( + Start("complex"), + "[", + NonTerminal("Words"), + "]", + "{", + NonTerminal('![{}] PlainWords'), + "}", + End("complex") +).writeStandalone( + open(splitext(basename(__file__))[0] + ".svg", "w").write) diff --git a/def_Block.py b/def_Block.py new file mode 100755 index 0000000..3dcd10f --- /dev/null +++ b/def_Block.py @@ -0,0 +1,19 @@ +#!/usr/bin/python3 + +# Copyright 2025, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ + +from os.path import basename, splitext +from railroad import * + +Diagram( + Start("complex"), + Choice(0, + NonTerminal("Header"), + NonTerminal("List"), + NonTerminal("LinkDefinition"), + NonTerminal("Paragraph") + ), + End("complex") +).writeStandalone( + open(splitext(basename(__file__))[0] + ".svg", "w").write) diff --git a/def_HardBreak.py b/def_HardBreak.py new file mode 100755 index 0000000..386487d --- /dev/null +++ b/def_HardBreak.py @@ -0,0 +1,16 @@ +#!/usr/bin/python3 + +# Copyright 2025, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ + +from os.path import basename, splitext +from railroad import * + +Diagram( + Start("complex"), + NonTerminal("SPACE"), + NonTerminal("SPACE"), + NonTerminal("NEWLINE"), + End("complex") +).writeStandalone( + open(splitext(basename(__file__))[0] + ".svg", "w").write) diff --git a/def_Header.py b/def_Header.py new file mode 100755 index 0000000..2559443 --- /dev/null +++ b/def_Header.py @@ -0,0 +1,17 @@ +#!/usr/bin/python3 + +# Copyright 2025, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ + +from os.path import basename, splitext +from railroad import * + +Diagram( + Start("complex"), + OneOrMore('#'), + ZeroOrMore(NonTerminal("SPACE")), + Optional(NonTerminal("Words")), + NonTerminal("NEWLINE"), + End("complex") +).writeStandalone( + open(splitext(basename(__file__))[0] + ".svg", "w").write) diff --git a/def_LinkDefinition.py b/def_LinkDefinition.py new file mode 100755 index 0000000..b2b5b15 --- /dev/null +++ b/def_LinkDefinition.py @@ -0,0 +1,31 @@ +#!/usr/bin/python3 + +# Copyright 2025, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ + +from os.path import basename, splitext +from railroad import * + +Diagram( + Start("complex"), + Stack( + Sequence( + "[", + NonTerminal("![\[\]] PlainWords"), + "]", + NonTerminal("SPACE"), + NonTerminal("Uri"), + NonTerminal("NEWLINE") + ), + Sequence( + NonTerminal("SPACE"), + NonTerminal("SPACE"), + '"', + NonTerminal("!'\"' PlainWords"), + '"', + NonTerminal("NEWLINE") + ) + ), + End("complex") +).writeStandalone( + open(splitext(basename(__file__))[0] + ".svg", "w").write) diff --git a/def_LinkedWords.py b/def_LinkedWords.py new file mode 100755 index 0000000..1185b8a --- /dev/null +++ b/def_LinkedWords.py @@ -0,0 +1,36 @@ +#!/usr/bin/python3 + +# Copyright 2025, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ + +from os.path import basename, splitext +from railroad import * + +Diagram( + Start("complex"), + Choice(0, + Sequence( + "[", + NonTerminal("Words"), + "]", + "(", + NonTerminal("Uri"), + ")" + ), + Sequence( + "[", + NonTerminal("Words"), + "]", + "[", + NonTerminal("LinkDefinitionKey"), + "]" + ), + Sequence( + "[", + NonTerminal("LinkDefinitionKey"), + "]", + ) + ), + End("complex") +).writeStandalone( + open(splitext(basename(__file__))[0] + ".svg", "w").write) diff --git a/def_List.py b/def_List.py new file mode 100755 index 0000000..f5e8058 --- /dev/null +++ b/def_List.py @@ -0,0 +1,28 @@ +#!/usr/bin/python3 + +# Copyright 2025, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ + +from os.path import basename, splitext +from railroad import * + +Diagram( + Start("complex"), + Choice(0, + "[-*]", + Sequence( + "[[:digit:]]+", + "[).]", + ) + ), + Optional( + OneOrMore(NonTerminal("SPACE")), + Choice(0, + NonTerminal("List"), + NonTerminal("Words") + ) + ), + NonTerminal("NEWLINE"), + End("complex") +).writeStandalone( + open(splitext(basename(__file__))[0] + ".svg", "w").write) diff --git a/def_Markdown.py b/def_Markdown.py new file mode 100755 index 0000000..ce66425 --- /dev/null +++ b/def_Markdown.py @@ -0,0 +1,13 @@ +#!/usr/bin/python3 + +# Copyright 2025, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ + +from os.path import basename, splitext +from railroad import * + +Diagram( + ZeroOrMore(NonTerminal("MetaBlock")), + ZeroOrMore(NonTerminal("Block")) +).writeStandalone( + open(splitext(basename(__file__))[0] + ".svg", "w").write) diff --git a/def_NEWLINE.py b/def_NEWLINE.py new file mode 100755 index 0000000..5adbca6 --- /dev/null +++ b/def_NEWLINE.py @@ -0,0 +1,18 @@ +#!/usr/bin/python3 + +# Copyright 2025, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ + +from os.path import basename, splitext +from railroad import * + +Diagram( + Start("complex"), + Choice(0, + '\\r\\n', + '\\n', + '\\r', + ), + End("complex") +).writeStandalone( + open(splitext(basename(__file__))[0] + ".svg", "w").write) diff --git a/def_PRINTABLES.py b/def_PRINTABLES.py new file mode 100755 index 0000000..d5aed28 --- /dev/null +++ b/def_PRINTABLES.py @@ -0,0 +1,14 @@ +#!/usr/bin/python3 + +# Copyright 2025, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ + +from os.path import basename, splitext +from railroad import * + +Diagram( + Start("complex"), + OneOrMore("[[:graph:]]+"), + End("complex") +).writeStandalone( + open(splitext(basename(__file__))[0] + ".svg", "w").write) diff --git a/def_Paragraph.py b/def_Paragraph.py new file mode 100755 index 0000000..3513315 --- /dev/null +++ b/def_Paragraph.py @@ -0,0 +1,29 @@ +#!/usr/bin/python3 + +# Copyright 2025, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ + +from os.path import basename, splitext +from railroad import * + +Diagram( + Start("complex"), + NonTerminal("Words"), + ZeroOrMore( + Choice(0, + Sequence( + Optional(NonTerminal("SPACE")), + NonTerminal("NEWLINE"), + NonTerminal("Words") + ), + Sequence( + NonTerminal("HardBreak"), + NonTerminal("Words") + ) + ) + ), + NonTerminal("NEWLINE"), + OneOrMore(NonTerminal("NEWLINE")), + End("complex") +).writeStandalone( + open(splitext(basename(__file__))[0] + ".svg", "w").write) diff --git a/def_PlainWords.py b/def_PlainWords.py new file mode 100755 index 0000000..42764bc --- /dev/null +++ b/def_PlainWords.py @@ -0,0 +1,18 @@ +#!/usr/bin/python3 + +# Copyright 2025, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ + +from os.path import basename, splitext +from railroad import * + +Diagram( + Start("complex"), + NonTerminal("PRINTABLES"), + ZeroOrMore( + NonTerminal("SPACE"), + NonTerminal("PRINTABLES") + ), + End("complex") +).writeStandalone( + open(splitext(basename(__file__))[0] + ".svg", "w").write) diff --git a/def_SPACE.py b/def_SPACE.py new file mode 100755 index 0000000..9a6353b --- /dev/null +++ b/def_SPACE.py @@ -0,0 +1,14 @@ +#!/usr/bin/python3 + +# Copyright 2025, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ + +from os.path import basename, splitext +from railroad import * + +Diagram( + Start("complex"), + "' '", + End("complex") +).writeStandalone( + open(splitext(basename(__file__))[0] + ".svg", "w").write) diff --git a/def_StyledWords.py b/def_StyledWords.py new file mode 100755 index 0000000..c87ae6a --- /dev/null +++ b/def_StyledWords.py @@ -0,0 +1,30 @@ +#!/usr/bin/python3 + +# Copyright 2025, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ + +from os.path import basename, splitext +from railroad import * + +Diagram( + Start("complex"), + Choice(0, + Sequence( + "*", + NonTerminal("Words"), + "*" + ), + Sequence( + "**", + NonTerminal("Words"), + "**" + ), + Sequence( + "_", + NonTerminal("Words"), + "_" + ) + ), + End("complex") +).writeStandalone( + open(splitext(basename(__file__))[0] + ".svg", "w").write) diff --git a/def_Uri.py b/def_Uri.py new file mode 100755 index 0000000..79ef58a --- /dev/null +++ b/def_Uri.py @@ -0,0 +1,21 @@ +#!/usr/bin/python3 + +# Copyright 2025, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ + +from os.path import basename, splitext +from railroad import * + +Diagram( + Start("complex"), + Choice(0, + Sequence( + "<", + NonTerminal("![<>] PRINTABLES"), + ">", + ), + NonTerminal("![<>] PRINTABLES"), + ), + End("complex") +).writeStandalone( + open(splitext(basename(__file__))[0] + ".svg", "w").write) diff --git a/def_Words.py b/def_Words.py new file mode 100755 index 0000000..cfc07dc --- /dev/null +++ b/def_Words.py @@ -0,0 +1,19 @@ +#!/usr/bin/python3 + +# Copyright 2025, Jonas Smedegaard <dr@jones.dk> +# SPDX-License-Identifier: GPL-3+ + +from os.path import basename, splitext +from railroad import * + +Diagram( + Start("complex"), + Choice(0, + NonTerminal("StyledWords"), + NonTerminal("LinkedWords"), + NonTerminal("AnnotatedWords"), + NonTerminal("PlainWords") + ), + End("complex") +).writeStandalone( + open(splitext(basename(__file__))[0] + ".svg", "w").write) @@ -275,6 +275,30 @@ file = {:Adobe2012 - Extensible Metadata Platform (XMP) Specification.pdf:PDF}, } +@Article{Ford2004, + author = {Bryan Ford}, + date = {2004-01}, + journaltitle = {ACM SIGPLAN Notices}, + title = {Parsing expression grammars}, + doi = {10.1145/982962.964011}, + issn = {1558-1160}, + number = {1}, + pages = {111--122}, + subtitle = {a recognition-based syntactic foundation}, + volume = {39}, + publisher = {Association for Computing Machinery (ACM)}, +} + +@Online{MacFarlane2014, + author = {John {MacFarlane}}, + date = {2014-10-25}, + title = {If anyone wants to contribute a BNF, please do! But I'm very skeptical that it can be done, due to}, + url = {https://github.com/commonmark/commonmark-spec/issues/113#issuecomment-60467783}, + organization = {GitHub, Inc.}, + titleaddon = {[Comment on code issue "Add BNF specification"]}, + urldate = {2025-05-17}, +} + @Comment{jabref-meta: databaseType:biblatex;} @Comment{jabref-meta: fileDirectory-jonas-bastian:/home/jonas/Projects/RUC/LIB/md;} @@ -67,6 +67,10 @@ are editorial notes not intented for inclusion in the final delivery.* {{< include _intro.qmd >}} +# Markdown and annotation + +{{< include _markdown.qmd >}} + # Analysis of existing framework {{< include _background.qmd >}} @@ -105,3 +109,12 @@ are editorial notes not intented for inclusion in the final delivery.* ```{.lua include="_extensions/ruc-play/semantic-markdown/semantic-markdown.lua" code-line-numbers="true"} ``` + +# Markdown syntax as PEG {.appendix #sec-def-peg} + +```{.peg include="def.peg" code-line-numbers="true"} +``` + +# Markdown syntax as syntax diagrams {.appendix #sec-def-dia} + +{{< include _def_dia.qmd >}} |
