diff options
| author | Jonas Smedegaard <dr@jones.dk> | 2025-05-26 08:43:35 +0200 |
|---|---|---|
| committer | Jonas Smedegaard <dr@jones.dk> | 2025-05-26 08:50:38 +0200 |
| commit | 1795ee0a7620f393142e9f259cf54fe798261ddb (patch) | |
| tree | 22f756eb7d7970276d7c8bb2aeb5b7184aa42eaa | |
| parent | c11ff0ea0262bfc1732197a86d3072f00373bdef (diff) | |
expand PEG definition; avoid too generic NAME
| -rw-r--r-- | _markdown.qmd | 17 | ||||
| -rw-r--r-- | syntax/def.peg | 56 |
2 files changed, 59 insertions, 14 deletions
diff --git a/_markdown.qmd b/_markdown.qmd index 5d9a15f..12a2180 100644 --- a/_markdown.qmd +++ b/_markdown.qmd @@ -217,8 +217,9 @@ with a dotted frame. `AnnotatedWords` can in principle contain any word, but in practice expects CSS id or class definitions, which means alphanumeric-only words prefixed by either dot or hash. -New higher prioritized syntaxes are added that should not clash with these, -for URI and CURIE words, +New higher prioritized syntaxes are added, +prioritized since that is simplest +and it should not cause clash with existing elements, as in @fig-def-extensions. *FIXME: mention and draw extended LinkedWordsX as well.* @@ -233,13 +234,13 @@ Syntax of `AnnotatedWords` and `LinkedWords`, extended with `SemWords`. The new `SemWords` are components in the RDF language, which is described further in @sec-rdf -either an angle-bracketed `Uri` or a `CURIE`. +either an angle-bracketed `Uri` or a `Curie`. Each component has an optional prefix to denote whether it is an RDF subject, predicate or object. (Again, these RDF terms are described further in @sec-rdf). See @fig-def-additions for their syntax diagrams. -*FIXME: mention and draw `Curie` and `NAME`* +*FIXME: mention and draw `Curie`* ::: {#fig-def-additions} @@ -247,7 +248,7 @@ See @fig-def-additions for their syntax diagrams.  -Syntax of `SemWords`, `Curie`, `SEMPREFIX` and `NAME`. +Syntax of `SemWords`, `Curie`, `SEMPREFIX`. ::: @@ -261,14 +262,14 @@ unlike Markdown in general but like link definition blocks. For a Markdown parser to cover the Markdown extension Semantic Markdown, it needs to cover the existing extension AnnotatedWords, -extended to contain URIs and CURIEs, +extended to include `Uri` and `Curie`, and it needs to cover AnnotatedWords not only immediately after Words, but also as leading or trailing Words for a block. Additionally, a new block type needs to be covered, similar to LinkDefinition but a simpler structure -with a CURIE as initial element. +with a `Curie` as initial element. These new Word and Block syntaxes should be prioritized, -as the restricted patterns tied to CURIEs is unlikely to collide +as the `Uri` and `Curie` patterns are unlikely to collide with existing Markdown or non-markup plain text. diff --git a/syntax/def.peg b/syntax/def.peg index e40610f..640958f 100644 --- a/syntax/def.peg +++ b/syntax/def.peg @@ -48,9 +48,53 @@ NEWLINE <- '\r\n' / '\r' # Semantic Markdown -AnnotatedWordsX <- '[' Words ']' - ('{' (SemWords / ![{}] PlainWords) '}') -SemWords <- SEMPREFIX '<' ![<>] PRINTABLES '>' - / SEMPREFIX Curie -Curie <- NAME? ':' NAME? -SEMPREFIX <- [.#] +# @see <https://source.jones.dk/semantic-markdown/about/> +AnnotatedWordsX <- '[' Words ']' + ('{' (SemWords / ![{}] PlainWords) '}') +SemWords <- SEMPREFIX '<' ![<>] PRINTABLES '>' + / SEMPREFIX Curie +SEMPREFIX <- [.#] + +# RDF CURIE +# @see <https://www.w3.org/TR/rdfa-core/#s_curies> +Curie <- (CuriePrefix? ':')? CurieReference +CuriePrefix <- NCName +CurieReference <- (IriPathAbsolute / IriPathRootless / IriPathEmpty) + ('?' IriQuery)? ('#' IriFragment)? + +# RFC3987 - Internationalized Resource Identifiers (IRIs) +# @see <https://tools.ietf.org/html/rfc3987> +IriPathAbsolute <- '/' (IriSegment ('/' IriSegment?)*)* +IriPathRootless <- IriSegment ('/' IriSegment?)* +IriPathEmpty <- !IriPathChar +IriSegment <- IriPathChar+ +IriQuery <- (IriPathChar / IriPrivate / '/' / '?')* +IriFragment <- (IriPathChar / '/' / '?')* +IriPathChar <- IriUnreserved / PctEncoded / SubDelims / ':' / '@' +IriUnreserved <- [A-Za-z] / [0-9] / '-' / '.' / '_' / '~' / UcsChar +PctEncoded <- '%' [0-9A-F] [0-9A-F] +SubDelims <- '!' / '$' / '&' / "'" / '(' / ')' + / '*' / '+' / ',' / ';' / '=' +UcsChar <- [#xA0-#xD7FF] / [#xF900-#xFDCF] / [#xFDF0-#xFFEF] + / [#x10000-#x1FFFD] / [#x20000-#x2FFFD] / [#x30000-#x3FFFD] + / [#x40000-#x4FFFD] / [#x50000-#x5FFFD] / [#x60000-#x6FFFD] + / [#x70000-#x7FFFD] / [#x80000-#x8FFFD] / [#x90000-#x9FFFD] + / [#xA0000-#xAFFFD] / [#xB0000-#xBFFFD] / [#xC0000-#xCFFFD] + / [#xD0000-#xDFFFD] / [#xE1000-#xEFFFD] +IriPrivate <- [#xE000-#xF8FF] / [#xF0000-#xFFFFD] / [#x100000-#x10FFFD] + +# XML NCName +# @see <https://www.w3.org/TR/2009/REC-xml-names-20091208/#NT-NCName> +NCName <- !':' XMLName + +# XML Name +# @see <https://www.w3.org/TR/REC-xml/#NT-Name> +XMLName <- NameStartChar NameChar* +XMLNameChar <- NameStartChar / "-" / "." / [0-9] + / #xB7 / [#x0300-#x036F] / [#x203F-#x2040] +XMLNAMESTARTCHAR <- ":" / "_" / [A-Z] / [a-z] + / [#xC0-#xD6] / [#xD8-#xF6] + / [#xF8-#x2FF] / [#x370-#x37D] / [#x37F-#x1FFF] + / [#x200C-#x200D] / [#x2070-#x218F] / [#x2C00-#x2FEF] + / [#x3001-#xD7FF] / [#xF900-#xFDCF] / [#xFDF0-#xFFFD] + / [#x10000-#xEFFFF] |
