From a85ed86b46b66d71db59085941d9feff94d79966 Mon Sep 17 00:00:00 2001 From: Jonas Smedegaard Date: Tue, 18 Mar 2025 09:16:15 +0100 Subject: rename Inlines() -> Statements(); ass Namespaces(); rewrite and improve comments --- .../semantic-markdown/semantic-markdown.lua | 165 ++++++++++++++++----- 1 file changed, 127 insertions(+), 38 deletions(-) (limited to '_extensions') diff --git a/_extensions/ruc-play/semantic-markdown/semantic-markdown.lua b/_extensions/ruc-play/semantic-markdown/semantic-markdown.lua index f17f309..032bc05 100644 --- a/_extensions/ruc-play/semantic-markdown/semantic-markdown.lua +++ b/_extensions/ruc-play/semantic-markdown/semantic-markdown.lua @@ -3,30 +3,50 @@ --- SPDX-FileCopyrightText: 2025 Jonas Smedegaard --- SPDX-License-Identifier: GPL-3.0-or-later --- ---- simple example +--- ## Examples --- +--- Ideally, this text: +--- +--- ```Markdown+RDF +--- Simple ontological annotation: +--- [This]{foaf:depiction} is not a pipe. +--- +--- Nested, mixed-use and custom-namespaced annotations: +--- [[Ceci]{foaf:depiction} n'est pas une pipe.]{lang=fr bibo:Quote} +--- +--- {bibo}: http://purl.org/ontology/bibo/ --- ``` ---- "A [map]{foaf:depiction} is not the territory" ---- | || | ---- | |brace_open brace_close ---- | bracket_close ---- bracket_open --- ---- TODO: complex example with nesting and mixed-use enclosure +--- ...should with this filter be transformed to this text: --- ---- ["[Ceci]{foaf:depiction} n'est pas une pipe"{lang=fr dc:Text} ---- | | || | ---- | | |brace_open brace_close ---- | | bracket_close ---- | bracket_open +--- ```Markdown +--- --- +--- turtle: | +--- @prefix bibo: http://purl.org/ontology/bibo/ +--- +--- _:001 a foaf:depiction . +--- _:002 a foaf:depiction . +--- _:003 a bibo:Quote . +--- --- +--- Simple ontological annotation: +--- This is not a pipe. +--- +--- Nested, mixed-use and custom-namespaced annotations: +--- [Ceci n'est pas une pipe.]{lang=fr} --- ``` --- +--- When target document format is html, +--- this filter should ideally produce RDFa 1.1 Lite or Core data. +--- (Lite is *not* a subset of Core as it deviates slightly). +--- --- * v0.0.1 --- * initial release --- --- @version 0.0.1 --- @see --- @see +--- @see +--- @see -- TODO: maybe use topdown traversal -- * order of declaring annotations might matter (but should not) @@ -37,41 +57,94 @@ -- @see os.setlocale 'C' --- TODO: support Unicode +-- TODO: cover non-ASCII Unicode characters -- @see ---- qnameLong - RDF/turtle QName with prefix and name as set of chars ---- @see -local _nameStartChar = "A-Z_a-z" -local _nameChar = _nameStartChar.."-0-9" -local _name = "[".._nameStartChar.."][".._nameChar.."]*" -local _prefixName = "[".._nameStartChar.."_-][".._nameChar.."]*" -local qnameLong = _prefixName..":".._name +--- Curie_long - CURIE with prefix and reference as set of chars +--- @see +local _name_start_char = "A-Z_a-z" +local _name_char = _name_start_char.."-0-9" +local _reference = "[".._name_start_char.."][".._name_char.."]*" +local _prefix = "[".._name_start_char.."_-][".._name_char.."]*" +local curie_long = _prefix..":".._reference ---- qnamePrefixed - RDF/turtle QName with only prefix as set of chars -local qnamePrefixed = _prefixName..":" +--- curie_no_ref - CURIE with only prefix as set of chars +local curie_no_ref = _prefix..":" ---- qnameLocal - RDF/turtle QName with only name as set of chars -local qnameLocal = ":".._name +--- curie_local - CURIE with only name as set of chars +local curie_local = ":".._reference ---- qnameDefault - RDF/turtle QName without prefix or name as char -local qnameDefault = ":" +--- curie_default - CURIE without prefix or name as char +local curie_default = ":" --- TODO: qname - RDF/turtle QName as `LPeg.re` regex object --- TODO: test and replace above qname* patterns +-- TODO: curie_re - CURIE as `LPeg.re` regex object +-- TODO: test and replace above curie* patterns -- @see ---local qname_re = re.compile("(".._prefixName..")?:(".._name..")?") +--local curie_re = re.compile("(".._prefix..")?:(".._reference..")?") --- TODO: process Blocks (vocabulary blocks first) +-- FIXME: define RDF context same as RDFa +-- TODO: maybe support overriding context with a JSON-LD URI +-- @see ---- Extract and strip semantic annotations from inlines +--- Namespaces - process RDF namespace IRI declarations +--- +--- Takes as input a list of Para block elements. +--- For each block matching the pattern for a namespace IRI definition, +--- the declared namespace is extracted. +--- Returns an empty paragraph in case of a match, +--- or nothing (to signal preservation of original content). +--- +--- Example: +--- +--- ```Markdown +--- # Annotated paragraph using a custom namespace +--- +--- My favorite animal is the [Liger]{ov:preferredAnimal}. +--- +--- {ov}: http://open.vocab.org/terms/ +--- ``` +--- +--- @param blocks Markdown with ontological annotations as Blocks +--- @returns Markdown without ontological annotations as Blocks +--- @see +--- @see +local function Namespaces(blocks) + if #blocks.content == 3 + and blocks.content[1].t == "Str" + and blocks.content[2].t == "Space" + and blocks.content[3].t == "Str" + and blocks.content[1].text:match "^{"..curie_no_ref.."}%:%:$" + and blocks.content[3].text:match "^https?:" + then + -- FIXME: register namespace in Meta + return pandoc.Blocks {} + end +end + +--- Statements - process inline RDF statements --- --- This function is a Pandoc hook executed for each Inlines object --- when iterating through its Abstract Syntax Tree (AST) of a document. --- +--- ```ASCII-art +--- Simple ontological annotation: +--- "A [map]{foaf:depiction} is not the territory" +--- | || | +--- | |brace_open brace_close +--- | bracket_close +--- bracket_open +--- +--- Nested and mixed-use annotations: +--- ["[Ceci]{foaf:depiction} n'est pas une pipe"{lang=fr dc:Text} +--- | | || | +--- | | |brace_open brace_close +--- | | bracket_close +--- | bracket_open +--- ``` +--- --- @param inlines Markdown with semantic annotations as Inlines --- @returns Markdown stripped of semantic annotations as Inlines --- @see -function Inlines (inlines) +function Statements (inlines) -- positions of enclosure markers local bracket_open, bracket_close, brace_open, brace_close @@ -111,10 +184,10 @@ function Inlines (inlines) -- (ignore space-delimited enclosures: not in spec for inlines) -- completes a brace enclosure - -- TODO: support mixed-use enclosure (non-qname enclosure content) - -- TODO: cover qnamePrefix and qnameLocal and qnameDefault + -- TODO: support mixed-use enclosure + -- TODO: cover curie_prefix and curie_local and curie_default if braced then - _, brace_close = string.find(el.text, "^"..qnameLong.."}", + _, brace_close = string.find(el.text, "^"..curie_long.."}", bracket_close) if brace_close then has_hints = true @@ -123,7 +196,23 @@ function Inlines (inlines) end ::continue:: end - if has_hints then - return pandoc.Inlines {new_inlines} - end +-- FIXME +-- if has_hints then +-- return pandoc.Inlines {new_inlines} +-- end end + +-- First resolve namespace declarations, then statements. +-- +-- Although this filter is *not* a full RDF parser, +-- e.g. namespace resolving is similar to other RDF formats +-- with detailed documented process ordering. +-- @see +return { + {Para = Namespaces}, + {Inlines = Statements} + -- FIXME: add custom declared namespaces in Meta + -- TODO: maybe add only actively used namespaces + -- (do same as for unused link definitions) + --{Meta = NamespacesToMeta}, +} -- cgit v1.2.3