aboutsummaryrefslogtreecommitdiff
path: root/syntax/def.peg
blob: 11ce1ab5ddeeac137ac82dbf933dc065bf6e67a5 (plain)
  1. # Subsets of Markdown syntax structure
  2. # expressed in parsing expression grammar (PEG) notation.
  3. #
  4. # Copyright 2025, Jonas Smedegaard <dr@jones.dk>
  5. # SPDX-License-Identifier: GPL-3+
  6. # Document
  7. Markdown <- ('---' NEWLINE MetaBlock* '---' NEWLINE)? Block*
  8. # Block elements
  9. Block <- Header
  10. / List
  11. / LinkDefinition
  12. / Paragraph
  13. Header <- '#'+ SPACE* !NEWLINE Words? SPACE* NEWLINE
  14. List <- ([-*]+ / [[:digit:]]+ [).]) (_ (List / Words))? NEWLINE
  15. LinkDefinition <- LinkLabel ':' _? Uri (_ LinkTitle)? NEWLINE
  16. Paragraph <- Words (HardBreak Words)* NEWLINE+
  17. # Inline elements
  18. Words <- StyledWords
  19. / LinkedWords
  20. / AnnotatedWords
  21. / PlainWords
  22. StyledWords <- '**' Words '**'
  23. / '*' Words '*'
  24. / '_' Words '_'
  25. LinkedWords <- '[' Words ']' '(' ((Uri _)? LinkTitle / Uri) ')'
  26. / '[' Words ']' LinkLabel
  27. / LinkLabel
  28. AnnotatedWords <- '[' Words ']' '{' ![{}] PlainWords '}'
  29. LinkLabel <- '[' _? ![\[\]] PlainWords _? ']'
  30. LinkTitle <- '"' _? !'"' _? PlainWords _? '"'
  31. / "'" _? !"'" PlainWords _? "'"
  32. / "(" _? ![()] PlainWords _? ")"
  33. PlainWords <- PRINTABLES (_ PRINTABLES)*
  34. Uri <- '<' ![<>] PRINTABLES? '>'
  35. / ![<>] PRINTABLES
  36. HardBreak <- SPACE SPACE+ NEWLINE
  37. _ <- SPACE+
  38. / SPACE? NEWLINE SPACE? SPACE? SPACE?
  39. # Terminals
  40. PRINTABLES <- [[:graph:]]+
  41. SPACE <- ' '
  42. NEWLINE <- '\r\n'
  43. / '\n'
  44. / '\r'
  45. # Semantic Markdown
  46. # @see <https://source.jones.dk/semantic-markdown/about/>
  47. AnnotatedWordsX <- '[' Words ']'
  48. ('{' (SemWords / ![{}] PlainWords) '}')
  49. SemWords <- SemWord (_ SemWord)*
  50. SemWord <- SEMPREFIX '<' ![<>] PRINTABLES '>'
  51. / SEMPREFIX Curie
  52. SEMPREFIX <- [.=]
  53. # RDF CURIE
  54. # @see <https://www.w3.org/TR/rdfa-core/#s_curies>
  55. Curie <- (CuriePrefix? ':')? CurieReference
  56. CuriePrefix <- NCName
  57. CurieReference <- (IriPathAbsolute / IriPathRootless / IriPathEmpty)
  58. ('?' IriQuery)? ('#' IriFragment)?
  59. # RFC3987 - Internationalized Resource Identifiers (IRIs)
  60. # @see <https://tools.ietf.org/html/rfc3987>
  61. IriPathAbsolute <- '/' (IriSegment ('/' IriSegment?)*)*
  62. IriPathRootless <- IriSegment ('/' IriSegment?)*
  63. IriPathEmpty <- !IriPathChar
  64. IriSegment <- IriPathChar+
  65. IriQuery <- (IriPathChar / IriPrivate / '/' / '?')*
  66. IriFragment <- (IriPathChar / '/' / '?')*
  67. IriPathChar <- IriUnreserved / PctEncoded / SubDelims / ':' / '@'
  68. IriUnreserved <- [A-Za-z] / [0-9] / '-' / '.' / '_' / '~' / UcsChar
  69. PctEncoded <- '%' [0-9A-F] [0-9A-F]
  70. SubDelims <- '!' / '$' / '&' / "'" / '(' / ')'
  71. / '*' / '+' / ',' / ';' / '='
  72. UcsChar <- [#xA0-#xD7FF] / [#xF900-#xFDCF] / [#xFDF0-#xFFEF]
  73. / [#x10000-#x1FFFD] / [#x20000-#x2FFFD] / [#x30000-#x3FFFD]
  74. / [#x40000-#x4FFFD] / [#x50000-#x5FFFD] / [#x60000-#x6FFFD]
  75. / [#x70000-#x7FFFD] / [#x80000-#x8FFFD] / [#x90000-#x9FFFD]
  76. / [#xA0000-#xAFFFD] / [#xB0000-#xBFFFD] / [#xC0000-#xCFFFD]
  77. / [#xD0000-#xDFFFD] / [#xE1000-#xEFFFD]
  78. IriPrivate <- [#xE000-#xF8FF] / [#xF0000-#xFFFFD] / [#x100000-#x10FFFD]
  79. # XML NCName
  80. # @see <https://www.w3.org/TR/2009/REC-xml-names-20091208/#NT-NCName>
  81. NCName <- !':' XMLName
  82. # XML Name
  83. # @see <https://www.w3.org/TR/REC-xml/#NT-Name>
  84. XMLName <- NameStartChar NameChar*
  85. XMLNameChar <- NameStartChar / "-" / "." / [0-9]
  86. / #xB7 / [#x0300-#x036F] / [#x203F-#x2040]
  87. XMLNAMESTARTCHAR <- ":" / "_" / [A-Z] / [a-z]
  88. / [#xC0-#xD6] / [#xD8-#xF6]
  89. / [#xF8-#x2FF] / [#x370-#x37D] / [#x37F-#x1FFF]
  90. / [#x200C-#x200D] / [#x2070-#x218F] / [#x2C00-#x2FEF]
  91. / [#x3001-#xD7FF] / [#xF900-#xFDCF] / [#xFDF0-#xFFFD]
  92. / [#x10000-#xEFFFF]