aboutsummaryrefslogtreecommitdiff
path: root/syntax/def.peg
blob: 30157d2c9b91f204ccdafbc17b5130f48ff2fe29 (plain)
  1. # Subsets of Markdown syntax structure
  2. # expressed in parsing expression grammar (PEG) notation.
  3. #
  4. # Copyright 2025, Jonas Smedegaard <dr@jones.dk>
  5. # SPDX-License-Identifier: GPL-3+
  6. # Document
  7. Markdown <- ('---' NEWLINE MetaBlock* '---' NEWLINE)? Block*
  8. # Block elements
  9. Block <- Header
  10. / List
  11. / LinkDefinition
  12. / Paragraph
  13. Header <- '#'+ SPACE* !NEWLINE Words? SPACE* NEWLINE
  14. List <- ([-*]+ / [[:digit:]]+ [).]) (_ (List / Words))? NEWLINE
  15. LinkDefinition <- LinkLabel ':' _? Uri (_ LinkTitle)? NEWLINE
  16. Paragraph <- Words (HardBreak Words)* NEWLINE+
  17. # Inline elements
  18. Words <- StyledWords
  19. / LinkedWords
  20. / AnnotatedWords
  21. / PlainWords
  22. StyledWords <- '**' Words '**'
  23. / '*' Words '*'
  24. / '_' Words '_'
  25. LinkedWords <- '[' Words ']' '(' ((Uri _)? LinkTitle / Uri) ')'
  26. / '[' Words ']' LinkLabel
  27. / LinkLabel
  28. AnnotatedWords <- '[' Words ']' '{' ![{}] PlainWords '}'
  29. LinkLabel <- '[' _? ![\[\]] PlainWords _? ']'
  30. LinkTitle <- '"' _? !'"' _? PlainWords _? '"'
  31. / "'" _? !"'" PlainWords _? "'"
  32. / "(" _? ![()] PlainWords _? ")"
  33. PlainWords <- PRINTABLES (_ PRINTABLES)*
  34. Uri <- '<' ![<>] PRINTABLES? '>'
  35. / ![<>] PRINTABLES
  36. HardBreak <- SPACE SPACE+ NEWLINE
  37. _ <- SPACE+
  38. / SPACE? NEWLINE SPACE? SPACE? SPACE?
  39. # Terminals
  40. PRINTABLES <- [[:graph:]]+
  41. SPACE <- ' '
  42. NEWLINE <- '\r\n'
  43. / '\n'
  44. / '\r'
  45. # Semantic Markdown
  46. # @see <https://source.jones.dk/semantic-markdown/about/>
  47. AnnotatedWordsX <- '[' Words ']'
  48. ('{' (SemWords / ![{}] PlainWords) '}')
  49. SemWords <- SEMPREFIX '<' ![<>] PRINTABLES '>'
  50. / SEMPREFIX Curie
  51. SEMPREFIX <- [.=]
  52. # RDF CURIE
  53. # @see <https://www.w3.org/TR/rdfa-core/#s_curies>
  54. Curie <- (CuriePrefix? ':')? CurieReference
  55. CuriePrefix <- NCName
  56. CurieReference <- (IriPathAbsolute / IriPathRootless / IriPathEmpty)
  57. ('?' IriQuery)? ('#' IriFragment)?
  58. # RFC3987 - Internationalized Resource Identifiers (IRIs)
  59. # @see <https://tools.ietf.org/html/rfc3987>
  60. IriPathAbsolute <- '/' (IriSegment ('/' IriSegment?)*)*
  61. IriPathRootless <- IriSegment ('/' IriSegment?)*
  62. IriPathEmpty <- !IriPathChar
  63. IriSegment <- IriPathChar+
  64. IriQuery <- (IriPathChar / IriPrivate / '/' / '?')*
  65. IriFragment <- (IriPathChar / '/' / '?')*
  66. IriPathChar <- IriUnreserved / PctEncoded / SubDelims / ':' / '@'
  67. IriUnreserved <- [A-Za-z] / [0-9] / '-' / '.' / '_' / '~' / UcsChar
  68. PctEncoded <- '%' [0-9A-F] [0-9A-F]
  69. SubDelims <- '!' / '$' / '&' / "'" / '(' / ')'
  70. / '*' / '+' / ',' / ';' / '='
  71. UcsChar <- [#xA0-#xD7FF] / [#xF900-#xFDCF] / [#xFDF0-#xFFEF]
  72. / [#x10000-#x1FFFD] / [#x20000-#x2FFFD] / [#x30000-#x3FFFD]
  73. / [#x40000-#x4FFFD] / [#x50000-#x5FFFD] / [#x60000-#x6FFFD]
  74. / [#x70000-#x7FFFD] / [#x80000-#x8FFFD] / [#x90000-#x9FFFD]
  75. / [#xA0000-#xAFFFD] / [#xB0000-#xBFFFD] / [#xC0000-#xCFFFD]
  76. / [#xD0000-#xDFFFD] / [#xE1000-#xEFFFD]
  77. IriPrivate <- [#xE000-#xF8FF] / [#xF0000-#xFFFFD] / [#x100000-#x10FFFD]
  78. # XML NCName
  79. # @see <https://www.w3.org/TR/2009/REC-xml-names-20091208/#NT-NCName>
  80. NCName <- !':' XMLName
  81. # XML Name
  82. # @see <https://www.w3.org/TR/REC-xml/#NT-Name>
  83. XMLName <- NameStartChar NameChar*
  84. XMLNameChar <- NameStartChar / "-" / "." / [0-9]
  85. / #xB7 / [#x0300-#x036F] / [#x203F-#x2040]
  86. XMLNAMESTARTCHAR <- ":" / "_" / [A-Z] / [a-z]
  87. / [#xC0-#xD6] / [#xD8-#xF6]
  88. / [#xF8-#x2FF] / [#x370-#x37D] / [#x37F-#x1FFF]
  89. / [#x200C-#x200D] / [#x2070-#x218F] / [#x2C00-#x2FEF]
  90. / [#x3001-#xD7FF] / [#xF900-#xFDCF] / [#xFDF0-#xFFFD]
  91. / [#x10000-#xEFFFF]