diff options
| author | Jonas Smedegaard <dr@jones.dk> | 2025-03-18 09:16:15 +0100 |
|---|---|---|
| committer | Jonas Smedegaard <dr@jones.dk> | 2025-03-18 09:17:51 +0100 |
| commit | a85ed86b46b66d71db59085941d9feff94d79966 (patch) | |
| tree | d120f6cf242e0883857ca0f18472f0c3cb9eeb7d /_extensions | |
| parent | 19f090af9b8076d88926881030759682afbb14bd (diff) | |
rename Inlines() -> Statements(); ass Namespaces(); rewrite and improve comments
Diffstat (limited to '_extensions')
| -rw-r--r-- | _extensions/ruc-play/semantic-markdown/semantic-markdown.lua | 165 |
1 files changed, 127 insertions, 38 deletions
diff --git a/_extensions/ruc-play/semantic-markdown/semantic-markdown.lua b/_extensions/ruc-play/semantic-markdown/semantic-markdown.lua index f17f309..032bc05 100644 --- a/_extensions/ruc-play/semantic-markdown/semantic-markdown.lua +++ b/_extensions/ruc-play/semantic-markdown/semantic-markdown.lua @@ -3,30 +3,50 @@ --- SPDX-FileCopyrightText: 2025 Jonas Smedegaard <dr@jones.dk> --- SPDX-License-Identifier: GPL-3.0-or-later --- ---- simple example +--- ## Examples --- +--- Ideally, this text: +--- +--- ```Markdown+RDF +--- Simple ontological annotation: +--- [This]{foaf:depiction} is not a pipe. +--- +--- Nested, mixed-use and custom-namespaced annotations: +--- [[Ceci]{foaf:depiction} n'est pas une pipe.]{lang=fr bibo:Quote} +--- +--- {bibo}: http://purl.org/ontology/bibo/ --- ``` ---- "A [map]{foaf:depiction} is not the territory" ---- | || | ---- | |brace_open brace_close ---- | bracket_close ---- bracket_open --- ---- TODO: complex example with nesting and mixed-use enclosure +--- ...should with this filter be transformed to this text: --- ---- ["[Ceci]{foaf:depiction} n'est pas une pipe"{lang=fr dc:Text} ---- | | || | ---- | | |brace_open brace_close ---- | | bracket_close ---- | bracket_open +--- ```Markdown +--- --- +--- turtle: | +--- @prefix bibo: http://purl.org/ontology/bibo/ +--- +--- _:001 a foaf:depiction . +--- _:002 a foaf:depiction . +--- _:003 a bibo:Quote . +--- --- +--- Simple ontological annotation: +--- This is not a pipe. +--- +--- Nested, mixed-use and custom-namespaced annotations: +--- [Ceci n'est pas une pipe.]{lang=fr} --- ``` --- +--- When target document format is html, +--- this filter should ideally produce RDFa 1.1 Lite or Core data. +--- (Lite is *not* a subset of Core as it deviates slightly). +--- --- * v0.0.1 --- * initial release --- --- @version 0.0.1 --- @see <https://source.jones.dk/semantic-markdown/about/> --- @see <https://moodle.ruc.dk/course/view.php?id=23505> +--- @see <https://www.w3.org/TR/rdfa-primer/#using-rdfa> +--- @see <https://www.ctrl.blog/entry/rdfa-link-attributes.html> -- TODO: maybe use topdown traversal -- * order of declaring annotations might matter (but should not) @@ -37,41 +57,94 @@ -- @see <https://pandoc.org/lua-filters.html#common-pitfalls> os.setlocale 'C' --- TODO: support Unicode +-- TODO: cover non-ASCII Unicode characters -- @see <https://www.lua.org/manual/5.4/manual.html#6.5> ---- qnameLong - RDF/turtle QName with prefix and name as set of chars ---- @see <https://www.w3.org/TeamSubmission/turtle/#name> -local _nameStartChar = "A-Z_a-z" -local _nameChar = _nameStartChar.."-0-9" -local _name = "[".._nameStartChar.."][".._nameChar.."]*" -local _prefixName = "[".._nameStartChar.."_-][".._nameChar.."]*" -local qnameLong = _prefixName..":".._name +--- Curie_long - CURIE with prefix and reference as set of chars +--- @see <https://www.w3.org/TR/rdfa-core/#h-s_curies> +local _name_start_char = "A-Z_a-z" +local _name_char = _name_start_char.."-0-9" +local _reference = "[".._name_start_char.."][".._name_char.."]*" +local _prefix = "[".._name_start_char.."_-][".._name_char.."]*" +local curie_long = _prefix..":".._reference ---- qnamePrefixed - RDF/turtle QName with only prefix as set of chars -local qnamePrefixed = _prefixName..":" +--- curie_no_ref - CURIE with only prefix as set of chars +local curie_no_ref = _prefix..":" ---- qnameLocal - RDF/turtle QName with only name as set of chars -local qnameLocal = ":".._name +--- curie_local - CURIE with only name as set of chars +local curie_local = ":".._reference ---- qnameDefault - RDF/turtle QName without prefix or name as char -local qnameDefault = ":" +--- curie_default - CURIE without prefix or name as char +local curie_default = ":" --- TODO: qname - RDF/turtle QName as `LPeg.re` regex object --- TODO: test and replace above qname* patterns +-- TODO: curie_re - CURIE as `LPeg.re` regex object +-- TODO: test and replace above curie* patterns -- @see <https://pandoc.org/lua-filters.html#global-variables> ---local qname_re = re.compile("(".._prefixName..")?:(".._name..")?") +--local curie_re = re.compile("(".._prefix..")?:(".._reference..")?") --- TODO: process Blocks (vocabulary blocks first) +-- FIXME: define RDF context same as RDFa +-- TODO: maybe support overriding context with a JSON-LD URI +-- @see <https://www.w3.org/2011/rdfa-context/rdfa-1.1> ---- Extract and strip semantic annotations from inlines +--- Namespaces - process RDF namespace IRI declarations +--- +--- Takes as input a list of Para block elements. +--- For each block matching the pattern for a namespace IRI definition, +--- the declared namespace is extracted. +--- Returns an empty paragraph in case of a match, +--- or nothing (to signal preservation of original content). +--- +--- Example: +--- +--- ```Markdown +--- # Annotated paragraph using a custom namespace +--- +--- My favorite animal is the [Liger]{ov:preferredAnimal}. +--- +--- {ov}: http://open.vocab.org/terms/ +--- ``` +--- +--- @param blocks Markdown with ontological annotations as Blocks +--- @returns Markdown without ontological annotations as Blocks +--- @see <https://pandoc.org/lua-filters.html#type-blocks> +--- @see <https://www.w3.org/TR/rdf12-concepts/#vocabularies> +local function Namespaces(blocks) + if #blocks.content == 3 + and blocks.content[1].t == "Str" + and blocks.content[2].t == "Space" + and blocks.content[3].t == "Str" + and blocks.content[1].text:match "^{"..curie_no_ref.."}%:%:$" + and blocks.content[3].text:match "^https?:" + then + -- FIXME: register namespace in Meta + return pandoc.Blocks {} + end +end + +--- Statements - process inline RDF statements --- --- This function is a Pandoc hook executed for each Inlines object --- when iterating through its Abstract Syntax Tree (AST) of a document. --- +--- ```ASCII-art +--- Simple ontological annotation: +--- "A [map]{foaf:depiction} is not the territory" +--- | || | +--- | |brace_open brace_close +--- | bracket_close +--- bracket_open +--- +--- Nested and mixed-use annotations: +--- ["[Ceci]{foaf:depiction} n'est pas une pipe"{lang=fr dc:Text} +--- | | || | +--- | | |brace_open brace_close +--- | | bracket_close +--- | bracket_open +--- ``` +--- --- @param inlines Markdown with semantic annotations as Inlines --- @returns Markdown stripped of semantic annotations as Inlines --- @see <https://pandoc.org/lua-filters.html#type-inline> -function Inlines (inlines) +function Statements (inlines) -- positions of enclosure markers local bracket_open, bracket_close, brace_open, brace_close @@ -111,10 +184,10 @@ function Inlines (inlines) -- (ignore space-delimited enclosures: not in spec for inlines) -- completes a brace enclosure - -- TODO: support mixed-use enclosure (non-qname enclosure content) - -- TODO: cover qnamePrefix and qnameLocal and qnameDefault + -- TODO: support mixed-use enclosure + -- TODO: cover curie_prefix and curie_local and curie_default if braced then - _, brace_close = string.find(el.text, "^"..qnameLong.."}", + _, brace_close = string.find(el.text, "^"..curie_long.."}", bracket_close) if brace_close then has_hints = true @@ -123,7 +196,23 @@ function Inlines (inlines) end ::continue:: end - if has_hints then - return pandoc.Inlines {new_inlines} - end +-- FIXME +-- if has_hints then +-- return pandoc.Inlines {new_inlines} +-- end end + +-- First resolve namespace declarations, then statements. +-- +-- Although this filter is *not* a full RDF parser, +-- e.g. namespace resolving is similar to other RDF formats +-- with detailed documented process ordering. +-- @see <https://www.w3.org/TR/turtle/#sec-parsing> +return { + {Para = Namespaces}, + {Inlines = Statements} + -- FIXME: add custom declared namespaces in Meta + -- TODO: maybe add only actively used namespaces + -- (do same as for unused link definitions) + --{Meta = NamespacesToMeta}, +} |
