aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonas Smedegaard <dr@jones.dk>2025-06-21 10:25:26 +0200
committerJonas Smedegaard <dr@jones.dk>2025-06-21 10:55:46 +0200
commit8d6eb3d62147aa632ca085549c8b56c9053249b1 (patch)
tree15680e439d5f91dbc6fd4e82ec8bbfda9de005b2
parentbd51df7df19986530f839caf57304a97d91743d4 (diff)
refactor to collect iffy elements and chars in object
-rw-r--r--sem-md/sem-md.lua221
1 files changed, 147 insertions, 74 deletions
diff --git a/sem-md/sem-md.lua b/sem-md/sem-md.lua
index 8e58662..03efb51 100644
--- a/sem-md/sem-md.lua
+++ b/sem-md/sem-md.lua
@@ -202,6 +202,126 @@ local function is_last_significant_element(list, listpos)
or (listpos == (#list - 1) and list[#list].t == "Space"))
end
+--- newIffy - object representing a potential enclosure yet undecided
+---
+--- An "iffy" contains multiple trajectories of elements and chars,
+--- with methods to extend and consolidate them.
+--
+-- @see <https://appgurueu.github.io/2024/04/07/oop-in-lua.html>
+local function newIffy()
+ local self = {
+
+ -- stacks of qualified and pending unenclosed/enclosed elements
+ elems = pandoc.List(),
+ elems_unenclosed = pandoc.List(),
+ elems_enclosed = pandoc.List(),
+
+ -- strings of pending unenclosed/enclosed chars
+ chars_unenclosed = "",
+ chars_enclosed = ""
+ }
+
+ local has_bracketed_content = function()
+ return not TableEmpty(self.elems_enclosed)
+ end
+
+ local has_no_non_brace_content = function()
+ return (TableEmpty(self.elems)
+ and TableEmpty(self.elems_unenclosed)
+ and (self.chars_unenclosed:len() == 0
+ or self.chars_unenclosed == "{"))
+ end
+
+ local append_chars_unenclosed = function(chars)
+ self.chars_unenclosed = self.chars_unenclosed .. chars
+ end
+
+ local append_chars_enclosed = function(chars)
+ self.chars_enclosed = self.chars_enclosed .. chars
+ end
+
+ local append_chars_any = function(chars)
+ append_chars_unenclosed(chars)
+ append_chars_enclosed(chars)
+ end
+
+ local append_elems = function(target, data)
+ if type(data) == "userdata" then
+ target:insert(data)
+ elseif type(data) == "string" then
+ target:insert(pandoc.Str(data))
+ elseif type(data) == "table" then
+ target:extend(data)
+ else
+ error("unsupported data type: " .. type(data))
+ end
+ end
+
+ local append_elems_unenclosed = function(data)
+ append_elems(self.elems_unenclosed, data)
+ end
+
+ local append_elems_enclosed = function(data)
+ append_elems(self.elems_enclosed, data)
+ end
+
+ local append_elems_any = function(data)
+ append_elems(self.elems_unenclosed, data)
+ append_elems(self.elems_enclosed, data)
+ end
+
+ local elementize_chars_disclosed = function()
+ if self.chars_unenclosed:len() > 0 then
+ append_elems(self.elems_unenclosed, self.chars_unenclosed)
+ self.chars_unenclosed = ""
+ end
+ end
+
+ local elementize_chars_enclosed = function()
+ if self.chars_enclosed:len() > 0 then
+ append_elems(self.elems_enclosed, self.chars_enclosed)
+ self.chars_enclosed = ""
+ end
+ end
+
+ local disclose = function()
+ elementize_chars_disclosed()
+ self.chars_enclosed = ""
+ append_elems(self.elems, self.elems_unenclosed)
+ self.elems_unenclosed = pandoc.List()
+ self.elems_enclosed = pandoc.List()
+ end
+
+ local enclose = function()
+ elementize_chars_enclosed()
+ self.chars_unenclosed = ""
+ append_elems(self.elems, self.elems_enclosed)
+ self.elems_unenclosed = pandoc.List()
+ self.elems_enclosed = pandoc.List()
+ end
+
+ local conclude_all_and_return = function()
+ disclose()
+
+ return self.elems
+ end
+
+ return {
+ has_bracketed_content = has_bracketed_content,
+ has_no_non_brace_content = has_no_non_brace_content,
+ elementize_chars_disclosed = elementize_chars_disclosed,
+ elementize_chars_enclosed = elementize_chars_enclosed,
+ disclose = disclose,
+ enclose = enclose,
+ append_chars_unenclosed = append_chars_unenclosed,
+ append_chars_any = append_chars_any,
+ append_elems_unenclosed = append_elems_unenclosed,
+ append_elems_enclosed = append_elems_enclosed,
+ append_elems_any = append_elems_any,
+ conclude_all_and_return = conclude_all_and_return
+ }
+end
+
--- Namespaces - process RDF namespace IRI declarations
---
--- Takes as input a list of Para block elements.
@@ -332,14 +452,8 @@ local function Statements (block)
-- amount of detected statements in this block
local block_has_diverged = false
- -- stacks of qualified and pending unenclosed/enclosed elements
- local elems = pandoc.List()
- local elems_unenclosed = pandoc.List()
- local elems_enclosed = pandoc.List()
-
- -- strings of pending unenclosed/enclosed chars
- local chars_unenclosed = ""
- local chars_enclosed = ""
+ -- track potentially enclosed contents
+ local iffy = newIffy()
for i, el in ipairs(block.content) do
local pos = 1
@@ -357,8 +471,7 @@ local function Statements (block)
end
if encl == Enclosure.BRACED then
- elems_unenclosed:insert(el)
- elems_enclosed:insert(el)
+ iffy.append_elems_any(el)
goto continue
end
@@ -366,17 +479,14 @@ local function Statements (block)
if encl == Enclosure.BRACKETED_DONE then
-- disqualify bracketing not directly followed by brace
- elems:extend(elems_unenclosed)
- elems_unenclosed = pandoc.List()
- elems_enclosed = pandoc.List()
+ iffy.disclose()
encl = Enclosure.NONE
-- fall through to parse element as unenclosed
end
if encl == Enclosure.BRACKETED then
- elems_unenclosed:insert(el)
- elems_enclosed:insert(el)
+ iffy.append_elems_any(el)
goto continue
end
@@ -388,14 +498,12 @@ local function Statements (block)
if el.t == "Link"
and el.target:find("^{.*}$")
then
- elems:extend(elems_unenclosed)
- elems:extend(el.content)
- elems_unenclosed = pandoc.List()
- elems_enclosed = pandoc.List()
+ iffy.append_elems_any(el.content)
+ iffy.disclose()
block_has_diverged = true
else
- elems_unenclosed:insert(el)
+ iffy.append_elems_unenclosed(el)
end
-- specific elements represent content enclosure
@@ -419,23 +527,17 @@ local function Statements (block)
if encl == Enclosure.NONE then
local _, nextpos, s = str:find("^([^%[{\\]*)")
pos = nextpos and nextpos + 1 or pos + 1
- chars_unenclosed = chars_unenclosed..s
+ iffy.append_chars_unenclosed(s)
-- entering bracketed or braced enclosure
local t = str:sub(pos, pos)
if t == "[" or t == "{" then
-- qualify unenclosed elements
- if chars_unenclosed:len() > 0 then
- elems_unenclosed:insert(pandoc.Str(chars_unenclosed))
- end
- elems:extend(elems_unenclosed)
- elems_unenclosed = pandoc.List()
- elems_enclosed = pandoc.List()
+ iffy.disclose()
pos = pos + 1
- chars_unenclosed = t
- chars_enclosed = ""
+ iffy.append_chars_unenclosed(t)
if t == "[" then
encl = Enclosure.BRACKETED
elseif t == "{" then
@@ -450,13 +552,12 @@ local function Statements (block)
if encl == Enclosure.BRACKETED then
local _, nextpos, s = str:find("^([^%[%]}\\]*)", pos)
pos = nextpos and nextpos + 1 or pos + 1
- chars_unenclosed = chars_unenclosed..s
- chars_enclosed = chars_enclosed..s
+ iffy.append_chars_any(s)
-- exiting bracketed enclosure
if str:sub(pos, pos) == "]" then
pos = pos + 1
- chars_unenclosed = chars_unenclosed.."]"
+ iffy.append_chars_unenclosed("]")
encl = Enclosure.BRACKETED_DONE
end
end
@@ -467,21 +568,14 @@ local function Statements (block)
-- entering braced enclosure
if str:sub(pos, pos) == "{" then
pos = pos + 1
- chars_unenclosed = chars_unenclosed.."{"
+ iffy.append_chars_unenclosed("{")
encl = Enclosure.BRACED
-- leaving non-annotation enclosure
else
-- disqualify bracketing not directly followed by brace
- elems:extend(elems_unenclosed)
- elems_unenclosed = pandoc.List()
- elems_enclosed = pandoc.List()
- if chars_unenclosed:len() > 0 then
- elems:insert(pandoc.Str(chars_unenclosed))
- chars_unenclosed = ""
- end
- chars_enclosed = ""
+ iffy.disclose()
encl = Enclosure.NONE
end
@@ -502,25 +596,20 @@ local function Statements (block)
local _, nextpos5 = str:find(curie_pattern5, pos)
local nextpos = nextpos1 or nextpos2 or nextpos3 or nextpos4 or nextpos5
if nextpos then
- if chars_enclosed:len() > 0 then
- elems_enclosed:insert(pandoc.Str(chars_enclosed))
- end
+ iffy.elementize_chars_enclosed()
local str_inside = str:sub(pos, nextpos)
pos = nextpos + 1
local str_remains = str:sub(pos)
-- qualify completed bracketed enclosure
- if not TableEmpty(elems_enclosed) then
- elems:extend(elems_enclosed)
+ if iffy.has_bracketed_content() then
+ iffy.enclose()
encl = Enclosure.BRACED_DONE
block_has_diverged = true
-- qualify braced-only enclosure at beginning of block
- elseif (TableEmpty(elems)
- and TableEmpty(elems_unenclosed)
- and (chars_unenclosed:len() == 0 or chars_unenclosed == "{"))
- then
- elems:extend(elems_enclosed)
+ elseif iffy.has_no_non_brace_content() then
+ iffy.enclose()
encl = Enclosure.BRACED_DONE
block_has_diverged = true
@@ -528,40 +617,28 @@ local function Statements (block)
elseif is_last_significant_element(block.content, i)
and str_remains:len() == 0
then
- elems:extend(elems_enclosed)
+ iffy.enclose()
encl = Enclosure.BRACED_DONE
block_has_diverged = true
-- disqualify stray braced-only enclosure
else
- chars_unenclosed = chars_unenclosed .. str_inside
- elems_unenclosed:insert(pandoc.Str(chars_unenclosed))
- elems:extend(elems_unenclosed)
+ iffy.append_chars_unenclosed(str_inside)
+ iffy.disclose()
encl = Enclosure.NONE
end
- elems_enclosed = pandoc.List()
- elems_unenclosed = pandoc.List()
-
-- TODO: parse str_remains for another enclosure
if str_remains:len() > 0 then
- elems_unenclosed:insert(pandoc.Str(str_remains))
+ iffy.append_elems_unenclosed(str_remains)
encl = Enclosure.NONE
end
- chars_unenclosed = ""
- chars_enclosed = ""
end
end
-- push strings to stacks
- if chars_enclosed:len() > 0 then
- elems_enclosed:insert(pandoc.Str(chars_enclosed))
- end
- if chars_unenclosed:len() > 0 then
- elems_unenclosed:insert(pandoc.Str(chars_unenclosed))
- end
- chars_unenclosed = ""
- chars_enclosed = ""
+ iffy.elementize_chars_disclosed()
+ iffy.elementize_chars_enclosed()
-- done parsing current Inline element
::continue::
@@ -569,11 +646,7 @@ local function Statements (block)
-- return altered stack if it contains complete enclosures
if block_has_diverged then
-
- -- disqualify incomplete enclosure
- elems:extend(elems_unenclosed)
-
- block.content = elems
+ block.content = iffy.conclude_all_and_return()
return block
end
end