From 8d6eb3d62147aa632ca085549c8b56c9053249b1 Mon Sep 17 00:00:00 2001 From: Jonas Smedegaard Date: Sat, 21 Jun 2025 10:25:26 +0200 Subject: refactor to collect iffy elements and chars in object --- sem-md/sem-md.lua | 221 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 147 insertions(+), 74 deletions(-) diff --git a/sem-md/sem-md.lua b/sem-md/sem-md.lua index 8e58662..03efb51 100644 --- a/sem-md/sem-md.lua +++ b/sem-md/sem-md.lua @@ -202,6 +202,126 @@ local function is_last_significant_element(list, listpos) or (listpos == (#list - 1) and list[#list].t == "Space")) end +--- newIffy - object representing a potential enclosure yet undecided +--- +--- An "iffy" contains multiple trajectories of elements and chars, +--- with methods to extend and consolidate them. +-- +-- @see +local function newIffy() + local self = { + + -- stacks of qualified and pending unenclosed/enclosed elements + elems = pandoc.List(), + elems_unenclosed = pandoc.List(), + elems_enclosed = pandoc.List(), + + -- strings of pending unenclosed/enclosed chars + chars_unenclosed = "", + chars_enclosed = "" + } + + local has_bracketed_content = function() + return not TableEmpty(self.elems_enclosed) + end + + local has_no_non_brace_content = function() + return (TableEmpty(self.elems) + and TableEmpty(self.elems_unenclosed) + and (self.chars_unenclosed:len() == 0 + or self.chars_unenclosed == "{")) + end + + local append_chars_unenclosed = function(chars) + self.chars_unenclosed = self.chars_unenclosed .. chars + end + + local append_chars_enclosed = function(chars) + self.chars_enclosed = self.chars_enclosed .. chars + end + + local append_chars_any = function(chars) + append_chars_unenclosed(chars) + append_chars_enclosed(chars) + end + + local append_elems = function(target, data) + if type(data) == "userdata" then + target:insert(data) + elseif type(data) == "string" then + target:insert(pandoc.Str(data)) + elseif type(data) == "table" then + target:extend(data) + else + error("unsupported data type: " .. type(data)) + end + end + + local append_elems_unenclosed = function(data) + append_elems(self.elems_unenclosed, data) + end + + local append_elems_enclosed = function(data) + append_elems(self.elems_enclosed, data) + end + + local append_elems_any = function(data) + append_elems(self.elems_unenclosed, data) + append_elems(self.elems_enclosed, data) + end + + local elementize_chars_disclosed = function() + if self.chars_unenclosed:len() > 0 then + append_elems(self.elems_unenclosed, self.chars_unenclosed) + self.chars_unenclosed = "" + end + end + + local elementize_chars_enclosed = function() + if self.chars_enclosed:len() > 0 then + append_elems(self.elems_enclosed, self.chars_enclosed) + self.chars_enclosed = "" + end + end + + local disclose = function() + elementize_chars_disclosed() + self.chars_enclosed = "" + append_elems(self.elems, self.elems_unenclosed) + self.elems_unenclosed = pandoc.List() + self.elems_enclosed = pandoc.List() + end + + local enclose = function() + elementize_chars_enclosed() + self.chars_unenclosed = "" + append_elems(self.elems, self.elems_enclosed) + self.elems_unenclosed = pandoc.List() + self.elems_enclosed = pandoc.List() + end + + local conclude_all_and_return = function() + disclose() + + return self.elems + end + + return { + has_bracketed_content = has_bracketed_content, + has_no_non_brace_content = has_no_non_brace_content, + elementize_chars_disclosed = elementize_chars_disclosed, + elementize_chars_enclosed = elementize_chars_enclosed, + disclose = disclose, + enclose = enclose, + append_chars_unenclosed = append_chars_unenclosed, + append_chars_any = append_chars_any, + append_elems_unenclosed = append_elems_unenclosed, + append_elems_enclosed = append_elems_enclosed, + append_elems_any = append_elems_any, + conclude_all_and_return = conclude_all_and_return + } +end + --- Namespaces - process RDF namespace IRI declarations --- --- Takes as input a list of Para block elements. @@ -332,14 +452,8 @@ local function Statements (block) -- amount of detected statements in this block local block_has_diverged = false - -- stacks of qualified and pending unenclosed/enclosed elements - local elems = pandoc.List() - local elems_unenclosed = pandoc.List() - local elems_enclosed = pandoc.List() - - -- strings of pending unenclosed/enclosed chars - local chars_unenclosed = "" - local chars_enclosed = "" + -- track potentially enclosed contents + local iffy = newIffy() for i, el in ipairs(block.content) do local pos = 1 @@ -357,8 +471,7 @@ local function Statements (block) end if encl == Enclosure.BRACED then - elems_unenclosed:insert(el) - elems_enclosed:insert(el) + iffy.append_elems_any(el) goto continue end @@ -366,17 +479,14 @@ local function Statements (block) if encl == Enclosure.BRACKETED_DONE then -- disqualify bracketing not directly followed by brace - elems:extend(elems_unenclosed) - elems_unenclosed = pandoc.List() - elems_enclosed = pandoc.List() + iffy.disclose() encl = Enclosure.NONE -- fall through to parse element as unenclosed end if encl == Enclosure.BRACKETED then - elems_unenclosed:insert(el) - elems_enclosed:insert(el) + iffy.append_elems_any(el) goto continue end @@ -388,14 +498,12 @@ local function Statements (block) if el.t == "Link" and el.target:find("^{.*}$") then - elems:extend(elems_unenclosed) - elems:extend(el.content) - elems_unenclosed = pandoc.List() - elems_enclosed = pandoc.List() + iffy.append_elems_any(el.content) + iffy.disclose() block_has_diverged = true else - elems_unenclosed:insert(el) + iffy.append_elems_unenclosed(el) end -- specific elements represent content enclosure @@ -419,23 +527,17 @@ local function Statements (block) if encl == Enclosure.NONE then local _, nextpos, s = str:find("^([^%[{\\]*)") pos = nextpos and nextpos + 1 or pos + 1 - chars_unenclosed = chars_unenclosed..s + iffy.append_chars_unenclosed(s) -- entering bracketed or braced enclosure local t = str:sub(pos, pos) if t == "[" or t == "{" then -- qualify unenclosed elements - if chars_unenclosed:len() > 0 then - elems_unenclosed:insert(pandoc.Str(chars_unenclosed)) - end - elems:extend(elems_unenclosed) - elems_unenclosed = pandoc.List() - elems_enclosed = pandoc.List() + iffy.disclose() pos = pos + 1 - chars_unenclosed = t - chars_enclosed = "" + iffy.append_chars_unenclosed(t) if t == "[" then encl = Enclosure.BRACKETED elseif t == "{" then @@ -450,13 +552,12 @@ local function Statements (block) if encl == Enclosure.BRACKETED then local _, nextpos, s = str:find("^([^%[%]}\\]*)", pos) pos = nextpos and nextpos + 1 or pos + 1 - chars_unenclosed = chars_unenclosed..s - chars_enclosed = chars_enclosed..s + iffy.append_chars_any(s) -- exiting bracketed enclosure if str:sub(pos, pos) == "]" then pos = pos + 1 - chars_unenclosed = chars_unenclosed.."]" + iffy.append_chars_unenclosed("]") encl = Enclosure.BRACKETED_DONE end end @@ -467,21 +568,14 @@ local function Statements (block) -- entering braced enclosure if str:sub(pos, pos) == "{" then pos = pos + 1 - chars_unenclosed = chars_unenclosed.."{" + iffy.append_chars_unenclosed("{") encl = Enclosure.BRACED -- leaving non-annotation enclosure else -- disqualify bracketing not directly followed by brace - elems:extend(elems_unenclosed) - elems_unenclosed = pandoc.List() - elems_enclosed = pandoc.List() - if chars_unenclosed:len() > 0 then - elems:insert(pandoc.Str(chars_unenclosed)) - chars_unenclosed = "" - end - chars_enclosed = "" + iffy.disclose() encl = Enclosure.NONE end @@ -502,25 +596,20 @@ local function Statements (block) local _, nextpos5 = str:find(curie_pattern5, pos) local nextpos = nextpos1 or nextpos2 or nextpos3 or nextpos4 or nextpos5 if nextpos then - if chars_enclosed:len() > 0 then - elems_enclosed:insert(pandoc.Str(chars_enclosed)) - end + iffy.elementize_chars_enclosed() local str_inside = str:sub(pos, nextpos) pos = nextpos + 1 local str_remains = str:sub(pos) -- qualify completed bracketed enclosure - if not TableEmpty(elems_enclosed) then - elems:extend(elems_enclosed) + if iffy.has_bracketed_content() then + iffy.enclose() encl = Enclosure.BRACED_DONE block_has_diverged = true -- qualify braced-only enclosure at beginning of block - elseif (TableEmpty(elems) - and TableEmpty(elems_unenclosed) - and (chars_unenclosed:len() == 0 or chars_unenclosed == "{")) - then - elems:extend(elems_enclosed) + elseif iffy.has_no_non_brace_content() then + iffy.enclose() encl = Enclosure.BRACED_DONE block_has_diverged = true @@ -528,40 +617,28 @@ local function Statements (block) elseif is_last_significant_element(block.content, i) and str_remains:len() == 0 then - elems:extend(elems_enclosed) + iffy.enclose() encl = Enclosure.BRACED_DONE block_has_diverged = true -- disqualify stray braced-only enclosure else - chars_unenclosed = chars_unenclosed .. str_inside - elems_unenclosed:insert(pandoc.Str(chars_unenclosed)) - elems:extend(elems_unenclosed) + iffy.append_chars_unenclosed(str_inside) + iffy.disclose() encl = Enclosure.NONE end - elems_enclosed = pandoc.List() - elems_unenclosed = pandoc.List() - -- TODO: parse str_remains for another enclosure if str_remains:len() > 0 then - elems_unenclosed:insert(pandoc.Str(str_remains)) + iffy.append_elems_unenclosed(str_remains) encl = Enclosure.NONE end - chars_unenclosed = "" - chars_enclosed = "" end end -- push strings to stacks - if chars_enclosed:len() > 0 then - elems_enclosed:insert(pandoc.Str(chars_enclosed)) - end - if chars_unenclosed:len() > 0 then - elems_unenclosed:insert(pandoc.Str(chars_unenclosed)) - end - chars_unenclosed = "" - chars_enclosed = "" + iffy.elementize_chars_disclosed() + iffy.elementize_chars_enclosed() -- done parsing current Inline element ::continue:: @@ -569,11 +646,7 @@ local function Statements (block) -- return altered stack if it contains complete enclosures if block_has_diverged then - - -- disqualify incomplete enclosure - elems:extend(elems_unenclosed) - - block.content = elems + block.content = iffy.conclude_all_and_return() return block end end -- cgit v1.2.3