aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonas Smedegaard <dr@jones.dk>2025-06-22 16:18:58 +0200
committerJonas Smedegaard <dr@jones.dk>2025-06-22 16:22:26 +0200
commitf0ce0d95b17785a67d0922213c3af17a21f8e22d (patch)
tree48aa7405bd0b27e5f5125339097a9742862a67cb
parent3b4b77948252a4c704d5a3c7e42b51f7351e1199 (diff)
track enclosure state inside iffy object
-rw-r--r--sem-md/sem-md.lua85
1 files changed, 48 insertions, 37 deletions
diff --git a/sem-md/sem-md.lua b/sem-md/sem-md.lua
index 8b2c8b4..dc8e252 100644
--- a/sem-md/sem-md.lua
+++ b/sem-md/sem-md.lua
@@ -125,15 +125,12 @@ local SHORTNAME <const> = "sem-md"
local PANDOC_IS_OLD <const> = PANDOC_VERSION[1] < 3
or (PANDOC_VERSION[1] == 3 and PANDOC_VERSION[2] < 5)
---- pseudo-enum table to track parser enclosure state
---- @see <https://stackoverflow.com/a/70529481/18619283>
-local Enclosure = {
- NONE = "0",
- BRACKETED = "1",
- BRACKETED_DONE = "2",
- BRACED = "3",
- BRACED_DONE = "4",
-}
+-- parser enclosure states
+local NONE <const> = 0
+local BRACKETED <const> = 1
+local BRACKETED_DONE <const> = 2
+local BRACED <const> = 3
+local BRACED_DONE <const> = 4
-- element types representing content enclosure in Markdown
local ElementTypeIsEnclosure = {
@@ -211,6 +208,10 @@ end
local function newIffy()
local self = {
+ -- flags for enclosing stages
+ -- TODO: support nested bracket enclosure
+ state = NONE,
+
-- block has been mangled and should be returned at end of parsing
block_is_mangled = false,
@@ -224,6 +225,18 @@ local function newIffy()
chars_enclosed = ""
}
+ local set_state = function(newstate)
+ assert(type(newstate) == "number"
+ and newstate >= 0
+ and newstate <= BRACED_DONE,
+ "unknown state: " .. newstate)
+ self.state = newstate
+ end
+
+ local state_is = function(expected)
+ return self.state == expected
+ end
+
local has_bracketed_content = function()
return not TableEmpty(self.elems_enclosed)
end
@@ -329,6 +342,8 @@ local function newIffy()
end
return {
+ set_state = set_state,
+ state_is = state_is,
has_bracketed_content = has_bracketed_content,
has_no_non_brace_content = has_no_non_brace_content,
elementize_chars_disclosed = elementize_chars_disclosed,
@@ -468,10 +483,6 @@ local function Statements (block)
return
end
- -- flags for enclosing stages
- -- TODO: support nested bracket enclosure
- local encl = Enclosure.NONE
-
-- track potentially enclosed contents
local iffy = newIffy()
@@ -480,38 +491,38 @@ local function Statements (block)
-- non-string element, highest state first to support fall-through
if el.t ~= 'Str' then
- if encl == Enclosure.BRACED_DONE then
+ if iffy.state_is(BRACED_DONE) then
-- drop space after completed enclosure
if el.t ~= "Space" then
- encl = Enclosure.NONE
+ iffy.set_state(NONE)
end
-- fall through to parse element as unenclosed
end
- if encl == Enclosure.BRACED then
+ if iffy.state_is(BRACED) then
iffy.append_elems_any(el)
goto continue
end
- if encl == Enclosure.BRACKETED_DONE then
+ if iffy.state_is(BRACKETED_DONE) then
-- bracketing not directly followed by brace
iffy.disqualify()
- encl = Enclosure.NONE
+ iffy.set_state(NONE)
-- fall through to parse element as unenclosed
end
- if encl == Enclosure.BRACKETED then
+ if iffy.state_is(BRACKETED) then
iffy.append_elems_any(el)
goto continue
end
- if encl == Enclosure.NONE then
+ if iffy.state_is(NONE) then
-- semantic annotation misparsed as Link
-- TODO: limit to solely CURIEs in target
@@ -527,7 +538,7 @@ local function Statements (block)
-- specific elements represent content enclosure
if ElementTypeIsEnclosure[el.t] then
- encl = Enclosure.BRACKETED_DONE
+ iffy.set_state(BRACKETED_DONE)
end
end
@@ -537,13 +548,13 @@ local function Statements (block)
local str = el.text
-- unenclosed immediately after enclosure
- if encl == Enclosure.BRACED_DONE then
- encl = Enclosure.NONE
+ if iffy.state_is(BRACED_DONE) then
+ iffy.set_state(NONE)
end
-- unenclosed
-- TODO: accept backslash except immediately before bracket
- if encl == Enclosure.NONE then
+ if iffy.state_is(NONE) then
local _, nextpos, s = str:find("^([^%[{\\]*)")
pos = nextpos and nextpos + 1 or pos + 1
iffy.append_chars_unenclosed(s)
@@ -555,9 +566,9 @@ local function Statements (block)
pos = pos + 1
iffy.append_chars_unenclosed(t)
if t == "[" then
- encl = Enclosure.BRACKETED
+ iffy.set_state(BRACKETED)
elseif t == "{" then
- encl = Enclosure.BRACED
+ iffy.set_state(BRACED)
end
end
end
@@ -565,7 +576,7 @@ local function Statements (block)
-- in bracketed enclosure
-- TODO: accept backslash except immediately before bracket/brace
-- TODO: support nested bracket enclosure
- if encl == Enclosure.BRACKETED then
+ if iffy.state_is(BRACKETED) then
local _, nextpos, s = str:find("^([^%[%]}\\]*)", pos)
pos = nextpos and nextpos + 1 or pos + 1
iffy.append_chars_any(s)
@@ -574,32 +585,32 @@ local function Statements (block)
if str:sub(pos, pos) == "]" then
pos = pos + 1
iffy.append_chars_unenclosed("]")
- encl = Enclosure.BRACKETED_DONE
+ iffy.set_state(BRACKETED_DONE)
end
end
-- exited bracketed enclosure
- if encl == Enclosure.BRACKETED_DONE then
+ if iffy.state_is(BRACKETED_DONE) then
-- entering braced enclosure
if str:sub(pos, pos) == "{" then
pos = pos + 1
iffy.append_chars_unenclosed("{")
- encl = Enclosure.BRACED
+ iffy.set_state(BRACED)
-- leaving non-annotation enclosure
else
-- bracketing not directly followed by brace
iffy.disqualify()
- encl = Enclosure.NONE
+ iffy.set_state(NONE)
end
end
-- in braced enclosure, leaving it
-- TODO: support mixed-use enclosure
- if encl == Enclosure.BRACED then
+ if iffy.state_is(BRACED) then
local curie_pattern1 = "^"..CURIE_TYPE_PREFIX..CURIE_LONG.."}"
local curie_pattern2 = "^"..CURIE_TYPE_PREFIX..CURIE_NO_REF.."}"
local curie_pattern3 = "^"..CURIE_TYPE_PREFIX..CURIE_LOCAL.."}"
@@ -622,14 +633,14 @@ local function Statements (block)
iffy.append_elems_enclosed(pandoc.RawInline(SHORTNAME,
str_inside))
iffy.qualify_enclosed()
- encl = Enclosure.BRACED_DONE
+ iffy.set_state(BRACED_DONE)
-- braced-only enclosure at beginning of block
elseif iffy.has_no_non_brace_content() then
iffy.append_elems_enclosed(pandoc.RawInline(SHORTNAME,
str_inside))
iffy.qualify_enclosed()
- encl = Enclosure.BRACED_DONE
+ iffy.set_state(BRACED_DONE)
-- braced-only enclosure at end of block
elseif is_last_significant_element(block.content, i)
@@ -638,19 +649,19 @@ local function Statements (block)
iffy.append_elems_enclosed(pandoc.RawInline(SHORTNAME,
str_inside))
iffy.qualify_enclosed()
- encl = Enclosure.BRACED_DONE
+ iffy.set_state(BRACED_DONE)
-- stray braced-only enclosure
else
iffy.append_chars_unenclosed(str_inside .. "}")
iffy.disqualify()
- encl = Enclosure.NONE
+ iffy.set_state(NONE)
end
-- TODO: parse str_remains for another enclosure
if str_remains:len() > 0 then
iffy.append_elems_unenclosed(str_remains)
- encl = Enclosure.NONE
+ iffy.set_state(NONE)
end
end
end