aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonas Smedegaard <dr@jones.dk>2025-05-26 08:43:35 +0200
committerJonas Smedegaard <dr@jones.dk>2025-05-26 08:50:38 +0200
commit1795ee0a7620f393142e9f259cf54fe798261ddb (patch)
tree22f756eb7d7970276d7c8bb2aeb5b7184aa42eaa
parentc11ff0ea0262bfc1732197a86d3072f00373bdef (diff)
expand PEG definition; avoid too generic NAME
-rw-r--r--_markdown.qmd17
-rw-r--r--syntax/def.peg56
2 files changed, 59 insertions, 14 deletions
diff --git a/_markdown.qmd b/_markdown.qmd
index 5d9a15f..12a2180 100644
--- a/_markdown.qmd
+++ b/_markdown.qmd
@@ -217,8 +217,9 @@ with a dotted frame.
`AnnotatedWords` can in principle contain any word,
but in practice expects CSS id or class definitions,
which means alphanumeric-only words prefixed by either dot or hash.
-New higher prioritized syntaxes are added that should not clash with these,
-for URI and CURIE words,
+New higher prioritized syntaxes are added,
+prioritized since that is simplest
+and it should not cause clash with existing elements,
as in @fig-def-extensions.
*FIXME: mention and draw extended LinkedWordsX as well.*
@@ -233,13 +234,13 @@ Syntax of `AnnotatedWords` and `LinkedWords`, extended with `SemWords`.
The new `SemWords` are components in the RDF language,
which is described further in @sec-rdf
-either an angle-bracketed `Uri` or a `CURIE`.
+either an angle-bracketed `Uri` or a `Curie`.
Each component has an optional prefix
to denote whether it is an RDF subject, predicate or object.
(Again, these RDF terms are described further in @sec-rdf).
See @fig-def-additions for their syntax diagrams.
-*FIXME: mention and draw `Curie` and `NAME`*
+*FIXME: mention and draw `Curie`*
::: {#fig-def-additions}
@@ -247,7 +248,7 @@ See @fig-def-additions for their syntax diagrams.
![](syntax/def_SEMPREFIX.svg)
-Syntax of `SemWords`, `Curie`, `SEMPREFIX` and `NAME`.
+Syntax of `SemWords`, `Curie`, `SEMPREFIX`.
:::
@@ -261,14 +262,14 @@ unlike Markdown in general but like link definition blocks.
For a Markdown parser to cover the Markdown extension Semantic Markdown,
it needs to cover the existing extension AnnotatedWords,
-extended to contain URIs and CURIEs,
+extended to include `Uri` and `Curie`,
and it needs to cover AnnotatedWords not only immediately after Words,
but also as leading or trailing Words for a block.
Additionally, a new block type needs to be covered,
similar to LinkDefinition but a simpler structure
-with a CURIE as initial element.
+with a `Curie` as initial element.
These new Word and Block syntaxes should be prioritized,
-as the restricted patterns tied to CURIEs is unlikely to collide
+as the `Uri` and `Curie` patterns are unlikely to collide
with existing Markdown or non-markup plain text.
diff --git a/syntax/def.peg b/syntax/def.peg
index e40610f..640958f 100644
--- a/syntax/def.peg
+++ b/syntax/def.peg
@@ -48,9 +48,53 @@ NEWLINE <- '\r\n'
/ '\r'
# Semantic Markdown
-AnnotatedWordsX <- '[' Words ']'
- ('{' (SemWords / ![{}] PlainWords) '}')
-SemWords <- SEMPREFIX '<' ![<>] PRINTABLES '>'
- / SEMPREFIX Curie
-Curie <- NAME? ':' NAME?
-SEMPREFIX <- [.#]
+# @see <https://source.jones.dk/semantic-markdown/about/>
+AnnotatedWordsX <- '[' Words ']'
+ ('{' (SemWords / ![{}] PlainWords) '}')
+SemWords <- SEMPREFIX '<' ![<>] PRINTABLES '>'
+ / SEMPREFIX Curie
+SEMPREFIX <- [.#]
+
+# RDF CURIE
+# @see <https://www.w3.org/TR/rdfa-core/#s_curies>
+Curie <- (CuriePrefix? ':')? CurieReference
+CuriePrefix <- NCName
+CurieReference <- (IriPathAbsolute / IriPathRootless / IriPathEmpty)
+ ('?' IriQuery)? ('#' IriFragment)?
+
+# RFC3987 - Internationalized Resource Identifiers (IRIs)
+# @see <https://tools.ietf.org/html/rfc3987>
+IriPathAbsolute <- '/' (IriSegment ('/' IriSegment?)*)*
+IriPathRootless <- IriSegment ('/' IriSegment?)*
+IriPathEmpty <- !IriPathChar
+IriSegment <- IriPathChar+
+IriQuery <- (IriPathChar / IriPrivate / '/' / '?')*
+IriFragment <- (IriPathChar / '/' / '?')*
+IriPathChar <- IriUnreserved / PctEncoded / SubDelims / ':' / '@'
+IriUnreserved <- [A-Za-z] / [0-9] / '-' / '.' / '_' / '~' / UcsChar
+PctEncoded <- '%' [0-9A-F] [0-9A-F]
+SubDelims <- '!' / '$' / '&' / "'" / '(' / ')'
+ / '*' / '+' / ',' / ';' / '='
+UcsChar <- [#xA0-#xD7FF] / [#xF900-#xFDCF] / [#xFDF0-#xFFEF]
+ / [#x10000-#x1FFFD] / [#x20000-#x2FFFD] / [#x30000-#x3FFFD]
+ / [#x40000-#x4FFFD] / [#x50000-#x5FFFD] / [#x60000-#x6FFFD]
+ / [#x70000-#x7FFFD] / [#x80000-#x8FFFD] / [#x90000-#x9FFFD]
+ / [#xA0000-#xAFFFD] / [#xB0000-#xBFFFD] / [#xC0000-#xCFFFD]
+ / [#xD0000-#xDFFFD] / [#xE1000-#xEFFFD]
+IriPrivate <- [#xE000-#xF8FF] / [#xF0000-#xFFFFD] / [#x100000-#x10FFFD]
+
+# XML NCName
+# @see <https://www.w3.org/TR/2009/REC-xml-names-20091208/#NT-NCName>
+NCName <- !':' XMLName
+
+# XML Name
+# @see <https://www.w3.org/TR/REC-xml/#NT-Name>
+XMLName <- NameStartChar NameChar*
+XMLNameChar <- NameStartChar / "-" / "." / [0-9]
+ / #xB7 / [#x0300-#x036F] / [#x203F-#x2040]
+XMLNAMESTARTCHAR <- ":" / "_" / [A-Z] / [a-z]
+ / [#xC0-#xD6] / [#xD8-#xF6]
+ / [#xF8-#x2FF] / [#x370-#x37D] / [#x37F-#x1FFF]
+ / [#x200C-#x200D] / [#x2070-#x218F] / [#x2C00-#x2FEF]
+ / [#x3001-#xD7FF] / [#xF900-#xFDCF] / [#xFDF0-#xFFFD]
+ / [#x10000-#xEFFFF]