# Subsets of Markdown syntax structure # expressed in parsing expression grammar (PEG) notation. # # Copyright 2025, Jonas Smedegaard # SPDX-License-Identifier: GPL-3+ # Document Markdown <- ('---' NEWLINE MetaBlock* '---' NEWLINE)? Block* # Block elements Block <- LinkDefinition / Header / List / Paragraph Header <- '#'+ SPACE* !NEWLINE Words? SPACE* NEWLINE List <- ([-*]+ / [[:digit:]]+ [).]) (_ (List / Words))? NEWLINE LinkDefinition <- LinkLabel ':' _? Uri (_ LinkTitle)? NEWLINE Paragraph <- Words (HardBreak Words)* NEWLINE+ # Inline elements Words <- StyledWords / LinkedWords / AnnotatedWords / PlainWords StyledWords <- '**' Words '**' / '*' Words '*' / '_' Words '_' LinkedWords <- '[' Words ']' '(' ((Uri _)? LinkTitle / Uri) ')' / '[' Words ']' LinkLabel / LinkLabel AnnotatedWords <- '[' Words ']' '{' KeyWords '}' LinkLabel <- '[' _? ![\[\]] PlainWords _? ']' LinkTitle <- '"' _? !'"' _? PlainWords _? '"' / "'" _? !"'" PlainWords _? "'" / "(" _? ![()] PlainWords _? ")" KeyWords <- KeyWord (_ KeyWord)* PlainWords <- PRINTABLES (_ PRINTABLES)* KeyWord <- Identifier / Class / Attribute / '-' Identifier <- '#' [[:alpha:]] [[:alnum:]]* Class <- '.' [[:alpha:]] [[:alnum:]]* Attribute <- [[:alpha:]] [[:alnum:]]* SPACE* = SPACE* ('"' [[:alnum:]]+ '"' / [[:alnum:]]+) Uri <- '<' ![<>] PRINTABLES? '>' / ![<>] PRINTABLES HardBreak <- SPACE SPACE+ NEWLINE _ <- SPACE+ / SPACE? NEWLINE SPACE? SPACE? SPACE? # Terminals PRINTABLES <- [[:graph:]]+ SPACE <- ' ' NEWLINE <- '\r\n' / '\n' / '\r' # Semantic Markdown # @see BlockX <- LinkDefinition / PrefixDefinition / Header / List / Paragraph PrefixDefinition <- '{' CuriePrefix '}' ':' _? SemWord NEWLINE KeyWordX <- SemWord / Identifier / Class / Attribute / '-' SemWord <- SEMPREFIX ('<' ![<>] PRINTABLES '>' / Curie) SEMPREFIX <- [.=] # RDF CURIE # @see Curie <- (CuriePrefix? ':')? CurieReference CuriePrefix <- NCName CurieReference <- (IriPathAbsolute / IriPathRootless / IriPathEmpty) ('?' IriQuery)? ('#' IriFragment)? # RFC3987 - Internationalized Resource Identifiers (IRIs) # @see IriPathAbsolute <- '/' (IriSegment ('/' IriSegment?)*)* IriPathRootless <- IriSegment ('/' IriSegment?)* IriPathEmpty <- !IriPathChar IriSegment <- IriPathChar+ IriQuery <- (IriPathChar / IriPrivate / '/' / '?')* IriFragment <- (IriPathChar / '/' / '?')* IriPathChar <- IriUnreserved / PctEncoded / SubDelims / ':' / '@' IriUnreserved <- [A-Za-z] / [0-9] / '-' / '.' / '_' / '~' / UcsChar PctEncoded <- '%' [0-9A-F] [0-9A-F] SubDelims <- '!' / '$' / '&' / "'" / '(' / ')' / '*' / '+' / ',' / ';' / '=' UcsChar <- [#xA0-#xD7FF] / [#xF900-#xFDCF] / [#xFDF0-#xFFEF] / [#x10000-#x1FFFD] / [#x20000-#x2FFFD] / [#x30000-#x3FFFD] / [#x40000-#x4FFFD] / [#x50000-#x5FFFD] / [#x60000-#x6FFFD] / [#x70000-#x7FFFD] / [#x80000-#x8FFFD] / [#x90000-#x9FFFD] / [#xA0000-#xAFFFD] / [#xB0000-#xBFFFD] / [#xC0000-#xCFFFD] / [#xD0000-#xDFFFD] / [#xE1000-#xEFFFD] IriPrivate <- [#xE000-#xF8FF] / [#xF0000-#xFFFFD] / [#x100000-#x10FFFD] # XML NCName # @see NCName <- !':' XMLName # XML Name # @see XMLName <- NameStartChar NameChar* XMLNameChar <- NameStartChar / "-" / "." / [0-9] / #xB7 / [#x0300-#x036F] / [#x203F-#x2040] XMLNAMESTARTCHAR <- ":" / "_" / [A-Z] / [a-z] / [#xC0-#xD6] / [#xD8-#xF6] / [#xF8-#x2FF] / [#x370-#x37D] / [#x37F-#x1FFF] / [#x200C-#x200D] / [#x2070-#x218F] / [#x2C00-#x2FEF] / [#x3001-#xD7FF] / [#xF900-#xFDCF] / [#xFDF0-#xFFFD] / [#x10000-#xEFFFF]