aboutsummaryrefslogtreecommitdiff
path: root/syntax/def.peg
blob: 9f260d775e146e74e5d2a3f51a290f4b77929eae (plain)
  1. # Subsets of Markdown syntax structure
  2. # expressed in parsing expression grammar (PEG) notation.
  3. #
  4. # Copyright 2025, Jonas Smedegaard <dr@jones.dk>
  5. # SPDX-License-Identifier: GPL-3+
  6. # Document
  7. Markdown <- ('---' NEWLINE MetaBlock* '---' NEWLINE)? Block*
  8. # Block elements
  9. Block <- LinkDefinition
  10. / Header
  11. / List
  12. / Paragraph
  13. Header <- '#'+ SPACE* !NEWLINE Words? SPACE* NEWLINE
  14. List <- ([-*]+ / [[:digit:]]+ [).]) (_ (List / Words))? NEWLINE
  15. LinkDefinition <- LinkLabel ':' _? Uri (_ LinkTitle)? NEWLINE
  16. Paragraph <- Words (HardBreak Words)* NEWLINE+
  17. # Inline elements
  18. Words <- StyledWords
  19. / LinkedWords
  20. / AnnotatedWords
  21. / PlainWords
  22. StyledWords <- '**' Words '**'
  23. / '*' Words '*'
  24. / '_' Words '_'
  25. LinkedWords <- '[' Words ']' '(' ((Uri _)? LinkTitle / Uri) ')'
  26. / '[' Words ']' LinkLabel
  27. / LinkLabel
  28. AnnotatedWords <- '[' Words ']' '{' KeyWords '}'
  29. LinkLabel <- '[' _? ![\[\]] PlainWords _? ']'
  30. LinkTitle <- '"' _? !'"' _? PlainWords _? '"'
  31. / "'" _? !"'" PlainWords _? "'"
  32. / "(" _? ![()] PlainWords _? ")"
  33. KeyWords <- KeyWord (_ KeyWord)*
  34. PlainWords <- PRINTABLES (_ PRINTABLES)*
  35. KeyWord <- Identifier / Class / Attribute / '-'
  36. Identifier <- '#' [[:alpha:]] [[:alnum:]]*
  37. Class <- '.' [[:alpha:]] [[:alnum:]]*
  38. Attribute <- [[:alpha:]] [[:alnum:]]* SPACE* = SPACE*
  39. ('"' [[:alnum:]]+ '"' / [[:alnum:]]+)
  40. Uri <- '<' ![<>] PRINTABLES? '>'
  41. / ![<>] PRINTABLES
  42. HardBreak <- SPACE SPACE+ NEWLINE
  43. _ <- SPACE+
  44. / SPACE? NEWLINE SPACE? SPACE? SPACE?
  45. # Terminals
  46. PRINTABLES <- [[:graph:]]+
  47. SPACE <- ' '
  48. NEWLINE <- '\r\n'
  49. / '\n'
  50. / '\r'
  51. # Semantic Markdown
  52. # @see <https://source.jones.dk/semantic-markdown/about/>
  53. BlockX <- LinkDefinition
  54. / PrefixDefinition
  55. / Header
  56. / List
  57. / Paragraph
  58. PrefixDefinition <- '{' CuriePrefix '}' ':' _? SemWord NEWLINE
  59. KeyWordX <- SemWord / Identifier / Class / Attribute / '-'
  60. SemWord <- SEMPREFIX ('<' ![<>] PRINTABLES '>' / Curie)
  61. SEMPREFIX <- [.=]
  62. # RDF CURIE
  63. # @see <https://www.w3.org/TR/rdfa-core/#s_curies>
  64. Curie <- (CuriePrefix? ':')? CurieReference
  65. CuriePrefix <- NCName
  66. CurieReference <- (IriPathAbsolute / IriPathRootless / IriPathEmpty)
  67. ('?' IriQuery)? ('#' IriFragment)?
  68. # RFC3987 - Internationalized Resource Identifiers (IRIs)
  69. # @see <https://tools.ietf.org/html/rfc3987>
  70. IriPathAbsolute <- '/' (IriSegment ('/' IriSegment?)*)*
  71. IriPathRootless <- IriSegment ('/' IriSegment?)*
  72. IriPathEmpty <- !IriPathChar
  73. IriSegment <- IriPathChar+
  74. IriQuery <- (IriPathChar / IriPrivate / '/' / '?')*
  75. IriFragment <- (IriPathChar / '/' / '?')*
  76. IriPathChar <- IriUnreserved / PctEncoded / SubDelims / ':' / '@'
  77. IriUnreserved <- [A-Za-z] / [0-9] / '-' / '.' / '_' / '~' / UcsChar
  78. PctEncoded <- '%' [0-9A-F] [0-9A-F]
  79. SubDelims <- '!' / '$' / '&' / "'" / '(' / ')'
  80. / '*' / '+' / ',' / ';' / '='
  81. UcsChar <- [#xA0-#xD7FF] / [#xF900-#xFDCF] / [#xFDF0-#xFFEF]
  82. / [#x10000-#x1FFFD] / [#x20000-#x2FFFD] / [#x30000-#x3FFFD]
  83. / [#x40000-#x4FFFD] / [#x50000-#x5FFFD] / [#x60000-#x6FFFD]
  84. / [#x70000-#x7FFFD] / [#x80000-#x8FFFD] / [#x90000-#x9FFFD]
  85. / [#xA0000-#xAFFFD] / [#xB0000-#xBFFFD] / [#xC0000-#xCFFFD]
  86. / [#xD0000-#xDFFFD] / [#xE1000-#xEFFFD]
  87. IriPrivate <- [#xE000-#xF8FF] / [#xF0000-#xFFFFD] / [#x100000-#x10FFFD]
  88. # XML NCName
  89. # @see <https://www.w3.org/TR/2009/REC-xml-names-20091208/#NT-NCName>
  90. NCName <- !':' XMLName
  91. # XML Name
  92. # @see <https://www.w3.org/TR/REC-xml/#NT-Name>
  93. XMLName <- NameStartChar NameChar*
  94. XMLNameChar <- NameStartChar / "-" / "." / [0-9]
  95. / #xB7 / [#x0300-#x036F] / [#x203F-#x2040]
  96. XMLNAMESTARTCHAR <- ":" / "_" / [A-Z] / [a-z]
  97. / [#xC0-#xD6] / [#xD8-#xF6]
  98. / [#xF8-#x2FF] / [#x370-#x37D] / [#x37F-#x1FFF]
  99. / [#x200C-#x200D] / [#x2070-#x218F] / [#x2C00-#x2FEF]
  100. / [#x3001-#xD7FF] / [#xF900-#xFDCF] / [#xFDF0-#xFFFD]
  101. / [#x10000-#xEFFFF]