aboutsummaryrefslogtreecommitdiff
path: root/_extensions/ruc-play/semantic-markdown/semantic-markdown.lua
blob: 4c8b5e786f60b794de296a892a0abfc2793f8889 (plain)
  1. --- semantic-markdown - Pandoc plugin to process semantic hints
  2. ---
  3. --- SPDX-FileCopyrightText: 2025 Jonas Smedegaard <dr@jones.dk>
  4. --- SPDX-License-Identifier: GPL-3.0-or-later
  5. ---
  6. --- ## Examples
  7. ---
  8. --- Ideally, this text:
  9. ---
  10. --- ```Markdown+RDF
  11. --- Simple ontological annotation:
  12. --- [This]{foaf:depiction} is not a pipe.
  13. ---
  14. --- Nested, mixed-use and custom-namespaced annotations:
  15. --- [[Ceci]{foaf:depiction} n'est pas une pipe.]{lang=fr bibo:Quote}
  16. ---
  17. --- {bibo}: http://purl.org/ontology/bibo/
  18. --- ```
  19. ---
  20. --- ...should with this filter be transformed to this text:
  21. ---
  22. --- ```Markdown
  23. --- ---
  24. --- turtle: |
  25. --- @prefix bibo: http://purl.org/ontology/bibo/
  26. ---
  27. --- _:001 a foaf:depiction .
  28. --- _:002 a foaf:depiction .
  29. --- _:003 a bibo:Quote .
  30. --- ---
  31. --- Simple ontological annotation:
  32. --- This is not a pipe.
  33. ---
  34. --- Nested, mixed-use and custom-namespaced annotations:
  35. --- [Ceci n'est pas une pipe.]{lang=fr}
  36. --- ```
  37. ---
  38. --- When target document format is html,
  39. --- this filter should ideally produce RDFa 1.1 Lite or Core data.
  40. --- (Lite is *not* a subset of Core as it deviates slightly).
  41. ---
  42. --- * v0.0.1
  43. --- * initial release
  44. ---
  45. --- @version 0.0.1
  46. --- @see <https://source.jones.dk/semantic-markdown/about/>
  47. --- @see <https://moodle.ruc.dk/course/view.php?id=23505>
  48. --- @see <https://www.w3.org/TR/rdfa-primer/#using-rdfa>
  49. --- @see <https://www.ctrl.blog/entry/rdfa-link-attributes.html>
  50. -- TODO: maybe use topdown traversal
  51. -- * order of declaring annotations might matter (but should not)
  52. -- * might enable simpler functions and/or faster processing
  53. -- @see <https://pandoc.org/lua-filters.html#topdown-traversal>
  54. -- ensure stable character classes independent of system locale
  55. -- @see <https://pandoc.org/lua-filters.html#common-pitfalls>
  56. os.setlocale 'C'
  57. -- TODO: cover non-ASCII Unicode characters
  58. -- @see <https://www.lua.org/manual/5.4/manual.html#6.5>
  59. --- Curie_long - CURIE with prefix and reference as set of chars
  60. --- @see <https://www.w3.org/TR/rdfa-core/#h-s_curies>
  61. local _name_start_char = "A-Z_a-z"
  62. local _name_char = _name_start_char.."-0-9"
  63. local _reference = "[".._name_start_char.."][".._name_char.."]*"
  64. local _prefix = "[".._name_start_char.."_-][".._name_char.."]*"
  65. local curie_long = _prefix..":".._reference
  66. --- curie_no_ref - CURIE with only prefix as set of chars
  67. local curie_no_ref = _prefix..":"
  68. --- curie_local - CURIE with only name as set of chars
  69. local curie_local = ":".._reference
  70. --- curie_default - CURIE without prefix or name as char
  71. local curie_default = ":"
  72. -- TODO: curie_re - CURIE as `LPeg.re` regex object
  73. -- TODO: test and replace above curie* patterns
  74. -- @see <https://pandoc.org/lua-filters.html#global-variables>
  75. --local curie_re = re.compile("(".._prefix..")?:(".._reference..")?")
  76. -- FIXME: define RDF context same as RDFa
  77. -- TODO: maybe support overriding context with a JSON-LD URI
  78. -- @see <https://www.w3.org/2011/rdfa-context/rdfa-1.1>
  79. --- Namespaces - process RDF namespace IRI declarations
  80. ---
  81. --- Takes as input a list of Para block elements.
  82. --- For each block matching the pattern for a namespace IRI definition,
  83. --- the declared namespace is extracted.
  84. --- Returns an empty paragraph in case of a match,
  85. --- or nothing (to signal preservation of original content).
  86. ---
  87. --- Example:
  88. ---
  89. --- ```Markdown
  90. --- # Annotated paragraph using a custom namespace
  91. ---
  92. --- My favorite animal is the [Liger]{ov:preferredAnimal}.
  93. ---
  94. --- {ov}: http://open.vocab.org/terms/
  95. --- ```
  96. ---
  97. --- @param blocks Markdown with ontological annotations as Blocks
  98. --- @returns Markdown without ontological annotations as Blocks
  99. --- @see <https://pandoc.org/lua-filters.html#type-blocks>
  100. --- @see <https://www.w3.org/TR/rdf12-concepts/#vocabularies>
  101. local function Namespaces(blocks)
  102. if #blocks.content == 3
  103. and blocks.content[1].t == "Str"
  104. and blocks.content[2].t == "Space"
  105. and blocks.content[3].t == "Str"
  106. and blocks.content[1].text:match "^{"..curie_no_ref.."}%:%:$"
  107. and blocks.content[3].text:match "^https?:"
  108. then
  109. -- FIXME: register namespace in Meta
  110. return pandoc.Blocks {}
  111. end
  112. end
  113. --- Statements - process inline RDF statements
  114. ---
  115. --- Track enclosure changes and process relevant embedded statements
  116. --- for each Inlines object in a Pandoc Abstract Syntax Tree (AST).
  117. ---
  118. --- ```ASCII-art
  119. --- Simple ontological annotation:
  120. --- "A [map]{foaf:depiction} is not the territory"
  121. --- | || |
  122. --- | |`brace_begin `brace_end
  123. --- | `bracket_end
  124. --- `bracket_begin
  125. ---
  126. --- Nested and mixed-use annotations:
  127. --- ["[Ceci]{foaf:depiction} n'est pas une pipe"]{lang=fr dc:Text}
  128. --- | | || | || |
  129. --- | | |`brace_begin `brace_end |`brace_begin |
  130. --- | | `inner_bracket_end | brace_end
  131. --- | `inner_bracket_begin `outer_bracket_end
  132. --- `outer_bracket_end
  133. --- ```
  134. ---
  135. --- @param inlines Markdown with semantic annotations as Inlines
  136. --- @returns Markdown stripped of semantic annotations as Inlines
  137. --- @see <https://pandoc.org/lua-filters.html#type-inline>
  138. function Statements (blocks)
  139. -- positions of enclosure markers
  140. local pos_bracket_begin, pos_brace_begin, pos_brace_end
  141. -- positions of beginning of enclosed content
  142. -- usable as flags, since it is always preceded by a marker
  143. local pos_bracketed, pos_braced
  144. -- amount of detected statements in this block
  145. local statement_count = 0
  146. local new_inlines = {}
  147. for i, el in ipairs(blocks.content) do
  148. -- only string inlines can alter state
  149. if el.t ~= 'Str' then
  150. table.insert(new_inlines, el)
  151. goto continue
  152. end
  153. -- unenclosed
  154. if not (pos_bracketed or pos_braced) then
  155. _, pos_bracket_begin = string.find(el.text, "%[")
  156. if pos_bracket_begin then
  157. pos_bracketed = pos_bracket_begin + 1
  158. end
  159. end
  160. -- enters a bracket enclosure
  161. -- TODO: maybe support nested bracket enclosure
  162. if pos_bracketed and not pos_braced then
  163. _, pos_brace_begin, s = string.find(el.text, "^([^%[%]}]*)%]{",
  164. pos_bracketed)
  165. if pos_brace_begin then
  166. pos_braced = pos_brace_begin + 1
  167. table.insert(new_inlines, pandoc.Str(s))
  168. end
  169. end
  170. -- (ignore space-delimited enclosures: not in spec for inlines)
  171. -- completes a brace enclosure
  172. -- TODO: support mixed-use enclosure
  173. -- TODO: cover curie_prefix and curie_local and curie_default
  174. if pos_braced then
  175. _, pos_brace_end = string.find(el.text, "^"..curie_long.."}",
  176. pos_braced)
  177. if pos_brace_end then
  178. statement_count = statement_count + 1
  179. -- TODO: call same function with remains of Str
  180. end
  181. end
  182. ::continue::
  183. end
  184. -- FIXME
  185. -- if statement_count then
  186. -- return pandoc.Inlines {new_inlines}
  187. -- end
  188. end
  189. -- First resolve namespace declarations, then statements.
  190. --
  191. -- Although this filter is *not* a full RDF parser,
  192. -- order matters for the parts we do handle --
  193. -- e.g. namespace resolving is similar to other RDF formats
  194. -- with detailed documented process ordering.
  195. --
  196. -- @see <https://www.w3.org/TR/turtle/#sec-parsing>
  197. local meta = {}
  198. return {
  199. -- move aside MetaBlocks to speed up processing content
  200. --
  201. -- @see <https://stackoverflow.com/a/47356252/18619283>
  202. { Meta = function(m) meta = m; return {} end },
  203. {Para = Namespaces},
  204. {Block = Statements},
  205. -- FIXME: add custom declared namespaces in Meta
  206. -- TODO: maybe add only actively used namespaces
  207. -- (do same as for unused link definitions)
  208. { Meta = function(_) return meta; end },
  209. --{ Meta = function(_) return NamespacesToMeta(meta); end },
  210. }