From 75d54ad0c5d9bdcb02e9934401f1a23bc0645de7 Mon Sep 17 00:00:00 2001 From: Jonas Smedegaard Date: Mon, 26 May 2025 10:18:42 +0200 Subject: add syntax PrefixDefinition; improve text on Markdown --- _markdown.qmd | 50 ++++++++++++++++++++++++++---------------- _syntax.qmd | 4 ++++ syntax/def.peg | 1 + syntax/def_BlockX.py | 17 ++++++++++++++ syntax/def_PrefixDefinition.py | 17 ++++++++++++++ 5 files changed, 70 insertions(+), 19 deletions(-) create mode 100755 syntax/def_BlockX.py create mode 100755 syntax/def_PrefixDefinition.py diff --git a/_markdown.qmd b/_markdown.qmd index d038cc8..542b175 100644 --- a/_markdown.qmd +++ b/_markdown.qmd @@ -233,7 +233,7 @@ Syntax of `AnnotatedWords` and `LinkedWords`, extended with `SemWords`. ::: The new `SemWords` are components in the RDF language, -which is described further in @sec-rdf +which is described further in @sec-rdf; either an angle-bracketed `Uri` or a `Curie`. Each component has an optional prefix to denote whether it is an RDF subject, predicate or object. @@ -254,24 +254,36 @@ Syntax of `SemWords`, `SemWord`, `Curie` and `SEMPREFIX`. ::: -## Expectations of processors +## Suggestions for processors -Parsing should be human-friendly, +The purpose of these analyses is parsing, +as covered in the follwing chapters. +To conclude follows some general suggestions +for parsing the Markdown extension Semantic Markdown. + +For a parser of existing Markdown to be extended +to cover the Markdown extension Semantic Markdown, +it needs to cover the existing extension `AnnotatedWords`, +extended to contain `Uri` and `Curie`, +and `AnnotatedWords` should be permitted not only immediately after Words, +but also as initial or final `Words` in a block. +Additionally, the new block type `PrefixDefinition` needs to be covered, +similar to `LinkDefinition` but a simpler structure +with `Curie` as the initial element. + +Parsing of Semantic Markdown should be human-friendly, in the spirit of Markdown (see @sec-spirit). -This may translate to the annotations being dropped, -unlike Markdown in general but like link definition blocks. - -For a Markdown parser to cover the Markdown extension Semantic Markdown, -it needs to cover the existing extension AnnotatedWords, -extended to include `Uri` and `Curie`, -and it needs to cover AnnotatedWords not only immediately after Words, -but also as leading or trailing Words for a block. - -Additionally, a new block type needs to be covered, -similar to LinkDefinition but a simpler structure -with a `Curie` as initial element. - -These new Word and Block syntaxes should be prioritized, -as the `Uri` and `Curie` patterns are unlikely to collide -with existing Markdown or non-markup plain text. +For properly matched annotations, +this translates to the markup being removed from content, +similar to the removal of `LinkTarget` inlines and `LinkDefinition` blocks +in core Markdown, +and non-semantic `AnnotatedWords` in some dialects. +Non-matched data should be preserved, treated as content. + +These extended or new `Word` and `Block` syntaxes -- +`AnnotatedWords` and `PrefixDefinition` -- +should be safe to parse prioritized, +because the majorly involved `Curie` pattern should not collide +with any existing Markdown, +and is unlikely to appear in natural language text. diff --git a/_syntax.qmd b/_syntax.qmd index 0dd6204..e0bfb4e 100644 --- a/_syntax.qmd +++ b/_syntax.qmd @@ -6,12 +6,16 @@ regular expressions and the negative predicate from PEG notation. ![Block](syntax/def_Block.svg) +![Block with Semantic Markdown](syntax/def_BlockX.svg) + ![Header](syntax/def_Header.svg) ![List](syntax/def_List.svg) ![LinkDefinition](syntax/def_LinkDefinition.svg) +![PrefixDefinition](syntax/def_PrefixDefinition.svg) + ![Paragraph](syntax/def_Paragraph.svg) ![Words](syntax/def_Words.svg) diff --git a/syntax/def.peg b/syntax/def.peg index 11ce1ab..ab809ac 100644 --- a/syntax/def.peg +++ b/syntax/def.peg @@ -49,6 +49,7 @@ NEWLINE <- '\r\n' # Semantic Markdown # @see +PrefixDefinition <- '{' CuriePrefix '}' ':' _? SemWord NEWLINE AnnotatedWordsX <- '[' Words ']' ('{' (SemWords / ![{}] PlainWords) '}') SemWords <- SemWord (_ SemWord)* diff --git a/syntax/def_BlockX.py b/syntax/def_BlockX.py new file mode 100755 index 0000000..da38385 --- /dev/null +++ b/syntax/def_BlockX.py @@ -0,0 +1,17 @@ +#!/usr/bin/python3 + +# Copyright 2025, Jonas Smedegaard +# SPDX-License-Identifier: GPL-3+ + +from railroad import * + +Diagram( + Choice(0, + NonTerminal("Header"), + NonTerminal("List"), + Group(NonTerminal("PrefixDefinition")), + NonTerminal("LinkDefinition"), + NonTerminal("Paragraph") + ), + type="complex" +).writeStandalone(sys.stdout.write) diff --git a/syntax/def_PrefixDefinition.py b/syntax/def_PrefixDefinition.py new file mode 100755 index 0000000..c0e108a --- /dev/null +++ b/syntax/def_PrefixDefinition.py @@ -0,0 +1,17 @@ +#!/usr/bin/python3 + +# Copyright 2025, Jonas Smedegaard +# SPDX-License-Identifier: GPL-3+ + +from railroad import * + +Diagram( + "{", + NonTerminal("CuriePrefix"), + "}", + ":", + Choice(0, NonTerminal("_"), Skip()), + NonTerminal("SemWord"), + NonTerminal("NEWLINE"), + type="complex" +).writeStandalone(sys.stdout.write) -- cgit v1.2.3