aboutsummaryrefslogtreecommitdiff
path: root/_extensions/ruc-play/semantic-markdown/semantic-markdown.lua
diff options
context:
space:
mode:
authorJonas Smedegaard <dr@jones.dk>2025-03-18 09:16:15 +0100
committerJonas Smedegaard <dr@jones.dk>2025-03-18 09:17:51 +0100
commita85ed86b46b66d71db59085941d9feff94d79966 (patch)
treed120f6cf242e0883857ca0f18472f0c3cb9eeb7d /_extensions/ruc-play/semantic-markdown/semantic-markdown.lua
parent19f090af9b8076d88926881030759682afbb14bd (diff)
rename Inlines() -> Statements(); ass Namespaces(); rewrite and improve comments
Diffstat (limited to '_extensions/ruc-play/semantic-markdown/semantic-markdown.lua')
-rw-r--r--_extensions/ruc-play/semantic-markdown/semantic-markdown.lua165
1 files changed, 127 insertions, 38 deletions
diff --git a/_extensions/ruc-play/semantic-markdown/semantic-markdown.lua b/_extensions/ruc-play/semantic-markdown/semantic-markdown.lua
index f17f309..032bc05 100644
--- a/_extensions/ruc-play/semantic-markdown/semantic-markdown.lua
+++ b/_extensions/ruc-play/semantic-markdown/semantic-markdown.lua
@@ -3,30 +3,50 @@
--- SPDX-FileCopyrightText: 2025 Jonas Smedegaard <dr@jones.dk>
--- SPDX-License-Identifier: GPL-3.0-or-later
---
---- simple example
+--- ## Examples
---
+--- Ideally, this text:
+---
+--- ```Markdown+RDF
+--- Simple ontological annotation:
+--- [This]{foaf:depiction} is not a pipe.
+---
+--- Nested, mixed-use and custom-namespaced annotations:
+--- [[Ceci]{foaf:depiction} n'est pas une pipe.]{lang=fr bibo:Quote}
+---
+--- {bibo}: http://purl.org/ontology/bibo/
--- ```
---- "A [map]{foaf:depiction} is not the territory"
---- | || |
---- | |brace_open brace_close
---- | bracket_close
---- bracket_open
---
---- TODO: complex example with nesting and mixed-use enclosure
+--- ...should with this filter be transformed to this text:
---
---- ["[Ceci]{foaf:depiction} n'est pas une pipe"{lang=fr dc:Text}
---- | | || |
---- | | |brace_open brace_close
---- | | bracket_close
---- | bracket_open
+--- ```Markdown
+--- ---
+--- turtle: |
+--- @prefix bibo: http://purl.org/ontology/bibo/
+---
+--- _:001 a foaf:depiction .
+--- _:002 a foaf:depiction .
+--- _:003 a bibo:Quote .
+--- ---
+--- Simple ontological annotation:
+--- This is not a pipe.
+---
+--- Nested, mixed-use and custom-namespaced annotations:
+--- [Ceci n'est pas une pipe.]{lang=fr}
--- ```
---
+--- When target document format is html,
+--- this filter should ideally produce RDFa 1.1 Lite or Core data.
+--- (Lite is *not* a subset of Core as it deviates slightly).
+---
--- * v0.0.1
--- * initial release
---
--- @version 0.0.1
--- @see <https://source.jones.dk/semantic-markdown/about/>
--- @see <https://moodle.ruc.dk/course/view.php?id=23505>
+--- @see <https://www.w3.org/TR/rdfa-primer/#using-rdfa>
+--- @see <https://www.ctrl.blog/entry/rdfa-link-attributes.html>
-- TODO: maybe use topdown traversal
-- * order of declaring annotations might matter (but should not)
@@ -37,41 +57,94 @@
-- @see <https://pandoc.org/lua-filters.html#common-pitfalls>
os.setlocale 'C'
--- TODO: support Unicode
+-- TODO: cover non-ASCII Unicode characters
-- @see <https://www.lua.org/manual/5.4/manual.html#6.5>
---- qnameLong - RDF/turtle QName with prefix and name as set of chars
---- @see <https://www.w3.org/TeamSubmission/turtle/#name>
-local _nameStartChar = "A-Z_a-z"
-local _nameChar = _nameStartChar.."-0-9"
-local _name = "[".._nameStartChar.."][".._nameChar.."]*"
-local _prefixName = "[".._nameStartChar.."_-][".._nameChar.."]*"
-local qnameLong = _prefixName..":".._name
+--- Curie_long - CURIE with prefix and reference as set of chars
+--- @see <https://www.w3.org/TR/rdfa-core/#h-s_curies>
+local _name_start_char = "A-Z_a-z"
+local _name_char = _name_start_char.."-0-9"
+local _reference = "[".._name_start_char.."][".._name_char.."]*"
+local _prefix = "[".._name_start_char.."_-][".._name_char.."]*"
+local curie_long = _prefix..":".._reference
---- qnamePrefixed - RDF/turtle QName with only prefix as set of chars
-local qnamePrefixed = _prefixName..":"
+--- curie_no_ref - CURIE with only prefix as set of chars
+local curie_no_ref = _prefix..":"
---- qnameLocal - RDF/turtle QName with only name as set of chars
-local qnameLocal = ":".._name
+--- curie_local - CURIE with only name as set of chars
+local curie_local = ":".._reference
---- qnameDefault - RDF/turtle QName without prefix or name as char
-local qnameDefault = ":"
+--- curie_default - CURIE without prefix or name as char
+local curie_default = ":"
--- TODO: qname - RDF/turtle QName as `LPeg.re` regex object
--- TODO: test and replace above qname* patterns
+-- TODO: curie_re - CURIE as `LPeg.re` regex object
+-- TODO: test and replace above curie* patterns
-- @see <https://pandoc.org/lua-filters.html#global-variables>
---local qname_re = re.compile("(".._prefixName..")?:(".._name..")?")
+--local curie_re = re.compile("(".._prefix..")?:(".._reference..")?")
--- TODO: process Blocks (vocabulary blocks first)
+-- FIXME: define RDF context same as RDFa
+-- TODO: maybe support overriding context with a JSON-LD URI
+-- @see <https://www.w3.org/2011/rdfa-context/rdfa-1.1>
---- Extract and strip semantic annotations from inlines
+--- Namespaces - process RDF namespace IRI declarations
+---
+--- Takes as input a list of Para block elements.
+--- For each block matching the pattern for a namespace IRI definition,
+--- the declared namespace is extracted.
+--- Returns an empty paragraph in case of a match,
+--- or nothing (to signal preservation of original content).
+---
+--- Example:
+---
+--- ```Markdown
+--- # Annotated paragraph using a custom namespace
+---
+--- My favorite animal is the [Liger]{ov:preferredAnimal}.
+---
+--- {ov}: http://open.vocab.org/terms/
+--- ```
+---
+--- @param blocks Markdown with ontological annotations as Blocks
+--- @returns Markdown without ontological annotations as Blocks
+--- @see <https://pandoc.org/lua-filters.html#type-blocks>
+--- @see <https://www.w3.org/TR/rdf12-concepts/#vocabularies>
+local function Namespaces(blocks)
+ if #blocks.content == 3
+ and blocks.content[1].t == "Str"
+ and blocks.content[2].t == "Space"
+ and blocks.content[3].t == "Str"
+ and blocks.content[1].text:match "^{"..curie_no_ref.."}%:%:$"
+ and blocks.content[3].text:match "^https?:"
+ then
+ -- FIXME: register namespace in Meta
+ return pandoc.Blocks {}
+ end
+end
+
+--- Statements - process inline RDF statements
---
--- This function is a Pandoc hook executed for each Inlines object
--- when iterating through its Abstract Syntax Tree (AST) of a document.
---
+--- ```ASCII-art
+--- Simple ontological annotation:
+--- "A [map]{foaf:depiction} is not the territory"
+--- | || |
+--- | |brace_open brace_close
+--- | bracket_close
+--- bracket_open
+---
+--- Nested and mixed-use annotations:
+--- ["[Ceci]{foaf:depiction} n'est pas une pipe"{lang=fr dc:Text}
+--- | | || |
+--- | | |brace_open brace_close
+--- | | bracket_close
+--- | bracket_open
+--- ```
+---
--- @param inlines Markdown with semantic annotations as Inlines
--- @returns Markdown stripped of semantic annotations as Inlines
--- @see <https://pandoc.org/lua-filters.html#type-inline>
-function Inlines (inlines)
+function Statements (inlines)
-- positions of enclosure markers
local bracket_open, bracket_close, brace_open, brace_close
@@ -111,10 +184,10 @@ function Inlines (inlines)
-- (ignore space-delimited enclosures: not in spec for inlines)
-- completes a brace enclosure
- -- TODO: support mixed-use enclosure (non-qname enclosure content)
- -- TODO: cover qnamePrefix and qnameLocal and qnameDefault
+ -- TODO: support mixed-use enclosure
+ -- TODO: cover curie_prefix and curie_local and curie_default
if braced then
- _, brace_close = string.find(el.text, "^"..qnameLong.."}",
+ _, brace_close = string.find(el.text, "^"..curie_long.."}",
bracket_close)
if brace_close then
has_hints = true
@@ -123,7 +196,23 @@ function Inlines (inlines)
end
::continue::
end
- if has_hints then
- return pandoc.Inlines {new_inlines}
- end
+-- FIXME
+-- if has_hints then
+-- return pandoc.Inlines {new_inlines}
+-- end
end
+
+-- First resolve namespace declarations, then statements.
+--
+-- Although this filter is *not* a full RDF parser,
+-- e.g. namespace resolving is similar to other RDF formats
+-- with detailed documented process ordering.
+-- @see <https://www.w3.org/TR/turtle/#sec-parsing>
+return {
+ {Para = Namespaces},
+ {Inlines = Statements}
+ -- FIXME: add custom declared namespaces in Meta
+ -- TODO: maybe add only actively used namespaces
+ -- (do same as for unused link definitions)
+ --{Meta = NamespacesToMeta},
+}