diff options
| author | Jonas Smedegaard <dr@jones.dk> | 2025-06-22 16:18:58 +0200 |
|---|---|---|
| committer | Jonas Smedegaard <dr@jones.dk> | 2025-06-22 16:22:26 +0200 |
| commit | f0ce0d95b17785a67d0922213c3af17a21f8e22d (patch) | |
| tree | 48aa7405bd0b27e5f5125339097a9742862a67cb | |
| parent | 3b4b77948252a4c704d5a3c7e42b51f7351e1199 (diff) | |
track enclosure state inside iffy object
| -rw-r--r-- | sem-md/sem-md.lua | 85 |
1 files changed, 48 insertions, 37 deletions
diff --git a/sem-md/sem-md.lua b/sem-md/sem-md.lua index 8b2c8b4..dc8e252 100644 --- a/sem-md/sem-md.lua +++ b/sem-md/sem-md.lua @@ -125,15 +125,12 @@ local SHORTNAME <const> = "sem-md" local PANDOC_IS_OLD <const> = PANDOC_VERSION[1] < 3 or (PANDOC_VERSION[1] == 3 and PANDOC_VERSION[2] < 5) ---- pseudo-enum table to track parser enclosure state ---- @see <https://stackoverflow.com/a/70529481/18619283> -local Enclosure = { - NONE = "0", - BRACKETED = "1", - BRACKETED_DONE = "2", - BRACED = "3", - BRACED_DONE = "4", -} +-- parser enclosure states +local NONE <const> = 0 +local BRACKETED <const> = 1 +local BRACKETED_DONE <const> = 2 +local BRACED <const> = 3 +local BRACED_DONE <const> = 4 -- element types representing content enclosure in Markdown local ElementTypeIsEnclosure = { @@ -211,6 +208,10 @@ end local function newIffy() local self = { + -- flags for enclosing stages + -- TODO: support nested bracket enclosure + state = NONE, + -- block has been mangled and should be returned at end of parsing block_is_mangled = false, @@ -224,6 +225,18 @@ local function newIffy() chars_enclosed = "" } + local set_state = function(newstate) + assert(type(newstate) == "number" + and newstate >= 0 + and newstate <= BRACED_DONE, + "unknown state: " .. newstate) + self.state = newstate + end + + local state_is = function(expected) + return self.state == expected + end + local has_bracketed_content = function() return not TableEmpty(self.elems_enclosed) end @@ -329,6 +342,8 @@ local function newIffy() end return { + set_state = set_state, + state_is = state_is, has_bracketed_content = has_bracketed_content, has_no_non_brace_content = has_no_non_brace_content, elementize_chars_disclosed = elementize_chars_disclosed, @@ -468,10 +483,6 @@ local function Statements (block) return end - -- flags for enclosing stages - -- TODO: support nested bracket enclosure - local encl = Enclosure.NONE - -- track potentially enclosed contents local iffy = newIffy() @@ -480,38 +491,38 @@ local function Statements (block) -- non-string element, highest state first to support fall-through if el.t ~= 'Str' then - if encl == Enclosure.BRACED_DONE then + if iffy.state_is(BRACED_DONE) then -- drop space after completed enclosure if el.t ~= "Space" then - encl = Enclosure.NONE + iffy.set_state(NONE) end -- fall through to parse element as unenclosed end - if encl == Enclosure.BRACED then + if iffy.state_is(BRACED) then iffy.append_elems_any(el) goto continue end - if encl == Enclosure.BRACKETED_DONE then + if iffy.state_is(BRACKETED_DONE) then -- bracketing not directly followed by brace iffy.disqualify() - encl = Enclosure.NONE + iffy.set_state(NONE) -- fall through to parse element as unenclosed end - if encl == Enclosure.BRACKETED then + if iffy.state_is(BRACKETED) then iffy.append_elems_any(el) goto continue end - if encl == Enclosure.NONE then + if iffy.state_is(NONE) then -- semantic annotation misparsed as Link -- TODO: limit to solely CURIEs in target @@ -527,7 +538,7 @@ local function Statements (block) -- specific elements represent content enclosure if ElementTypeIsEnclosure[el.t] then - encl = Enclosure.BRACKETED_DONE + iffy.set_state(BRACKETED_DONE) end end @@ -537,13 +548,13 @@ local function Statements (block) local str = el.text -- unenclosed immediately after enclosure - if encl == Enclosure.BRACED_DONE then - encl = Enclosure.NONE + if iffy.state_is(BRACED_DONE) then + iffy.set_state(NONE) end -- unenclosed -- TODO: accept backslash except immediately before bracket - if encl == Enclosure.NONE then + if iffy.state_is(NONE) then local _, nextpos, s = str:find("^([^%[{\\]*)") pos = nextpos and nextpos + 1 or pos + 1 iffy.append_chars_unenclosed(s) @@ -555,9 +566,9 @@ local function Statements (block) pos = pos + 1 iffy.append_chars_unenclosed(t) if t == "[" then - encl = Enclosure.BRACKETED + iffy.set_state(BRACKETED) elseif t == "{" then - encl = Enclosure.BRACED + iffy.set_state(BRACED) end end end @@ -565,7 +576,7 @@ local function Statements (block) -- in bracketed enclosure -- TODO: accept backslash except immediately before bracket/brace -- TODO: support nested bracket enclosure - if encl == Enclosure.BRACKETED then + if iffy.state_is(BRACKETED) then local _, nextpos, s = str:find("^([^%[%]}\\]*)", pos) pos = nextpos and nextpos + 1 or pos + 1 iffy.append_chars_any(s) @@ -574,32 +585,32 @@ local function Statements (block) if str:sub(pos, pos) == "]" then pos = pos + 1 iffy.append_chars_unenclosed("]") - encl = Enclosure.BRACKETED_DONE + iffy.set_state(BRACKETED_DONE) end end -- exited bracketed enclosure - if encl == Enclosure.BRACKETED_DONE then + if iffy.state_is(BRACKETED_DONE) then -- entering braced enclosure if str:sub(pos, pos) == "{" then pos = pos + 1 iffy.append_chars_unenclosed("{") - encl = Enclosure.BRACED + iffy.set_state(BRACED) -- leaving non-annotation enclosure else -- bracketing not directly followed by brace iffy.disqualify() - encl = Enclosure.NONE + iffy.set_state(NONE) end end -- in braced enclosure, leaving it -- TODO: support mixed-use enclosure - if encl == Enclosure.BRACED then + if iffy.state_is(BRACED) then local curie_pattern1 = "^"..CURIE_TYPE_PREFIX..CURIE_LONG.."}" local curie_pattern2 = "^"..CURIE_TYPE_PREFIX..CURIE_NO_REF.."}" local curie_pattern3 = "^"..CURIE_TYPE_PREFIX..CURIE_LOCAL.."}" @@ -622,14 +633,14 @@ local function Statements (block) iffy.append_elems_enclosed(pandoc.RawInline(SHORTNAME, str_inside)) iffy.qualify_enclosed() - encl = Enclosure.BRACED_DONE + iffy.set_state(BRACED_DONE) -- braced-only enclosure at beginning of block elseif iffy.has_no_non_brace_content() then iffy.append_elems_enclosed(pandoc.RawInline(SHORTNAME, str_inside)) iffy.qualify_enclosed() - encl = Enclosure.BRACED_DONE + iffy.set_state(BRACED_DONE) -- braced-only enclosure at end of block elseif is_last_significant_element(block.content, i) @@ -638,19 +649,19 @@ local function Statements (block) iffy.append_elems_enclosed(pandoc.RawInline(SHORTNAME, str_inside)) iffy.qualify_enclosed() - encl = Enclosure.BRACED_DONE + iffy.set_state(BRACED_DONE) -- stray braced-only enclosure else iffy.append_chars_unenclosed(str_inside .. "}") iffy.disqualify() - encl = Enclosure.NONE + iffy.set_state(NONE) end -- TODO: parse str_remains for another enclosure if str_remains:len() > 0 then iffy.append_elems_unenclosed(str_remains) - encl = Enclosure.NONE + iffy.set_state(NONE) end end end |
