From c3f9a7caecbf5239f29b6fd1a7edf47df4fc6d21 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 26 Oct 2014 11:57:43 -0700 Subject: Add EMPHASIS_STACK_LIMIT. If we get more than 1000 deep in potential emphasis/strong emphasis openers, we refuse to add more to the stack. This prevents the sort of stack overflow we previously got with python -c 'print "*a **a " * 100000; print " a** a*" * 100000' | ./cmark Partially addresses #166. --- src/cmark.h | 1 + src/inlines.c | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/cmark.h b/src/cmark.h index ff2f9a2..5e508e4 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -9,6 +9,7 @@ #define VERSION "0.1" #define CODE_INDENT 4 +#define EMPHASIS_STACK_LIMIT 1000 struct node_inl { enum { diff --git a/src/inlines.c b/src/inlines.c index 7a7f08a..928a3ac 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -23,6 +23,7 @@ typedef struct Subject { int label_nestlevel; reference_map *refmap; inline_stack *emphasis_openers; + int number_of_emphasis_openers; } subject; static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap); @@ -177,6 +178,7 @@ static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap) e->label_nestlevel = 0; e->refmap = refmap; e->emphasis_openers = NULL; + e->number_of_emphasis_openers = 0; chunk_rtrim(&e->input); } @@ -190,6 +192,7 @@ static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap) e->label_nestlevel = 0; e->refmap = refmap; e->emphasis_openers = NULL; + e->number_of_emphasis_openers = 0; chunk_rtrim(&e->input); } @@ -309,6 +312,7 @@ static void free_openers(subject* subj, inline_stack* istack) while (subj->emphasis_openers != istack) { tempstack = subj->emphasis_openers; subj->emphasis_openers = subj->emphasis_openers->previous; + subj->number_of_emphasis_openers--; free(tempstack); } } @@ -389,7 +393,7 @@ static node_inl* handle_strong_emph(subject* subj, unsigned char c, node_inl **l cannotClose: inl_text = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims)); - if (can_open) + if (can_open && subj->number_of_emphasis_openers < EMPHASIS_STACK_LIMIT) { istack = (inline_stack*)malloc(sizeof(inline_stack)); if (istack == NULL) { @@ -400,6 +404,7 @@ cannotClose: istack->first_inline = inl_text; istack->previous = subj->emphasis_openers; subj->emphasis_openers = istack; + subj->number_of_emphasis_openers++; } return inl_text; -- cgit v1.2.3 From e618715636a3bd60930bea34d214b3aaf8e9e766 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 26 Oct 2014 12:37:59 -0700 Subject: Require space before closing # sequence in ATX header. Closes #169. --- js/lib/blocks.js | 2 +- spec.txt | 24 ++++++++++++++++-------- src/blocks.c | 12 ++++++------ 3 files changed, 23 insertions(+), 15 deletions(-) (limited to 'src') diff --git a/js/lib/blocks.js b/js/lib/blocks.js index 109661f..175cc2a 100644 --- a/js/lib/blocks.js +++ b/js/lib/blocks.js @@ -366,7 +366,7 @@ var incorporateLine = function(ln, line_number) { container.level = match[0].trim().length; // number of #s // remove trailing ###s: container.strings = - [ln.slice(offset).replace(/(?:(\\#) *#*| *#+) *$/,'$1')]; + [ln.slice(offset).replace(/^ *#+ *$/, '').replace(/ +#+ *$/,'')]; break; } else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) { diff --git a/spec.txt b/spec.txt index e5953bd..5070b3b 100644 --- a/spec.txt +++ b/spec.txt @@ -479,11 +479,11 @@ consists of a string of characters, parsed as inline content, between an opening sequence of 1--6 unescaped `#` characters and an optional closing sequence of any number of `#` characters. The opening sequence of `#` characters cannot be followed directly by a nonspace character. -The closing `#` characters may be followed by spaces only. The opening -`#` character may be indented 0-3 spaces. The raw contents of the -header are stripped of leading and trailing spaces before being parsed -as inline content. The header level is equal to the number of `#` -characters in the opening sequence. +The optional closing sequence of `#`s must be preceded by a space and may be +followed by spaces only. The opening `#` character may be indented 0-3 +spaces. The raw contents of the header are stripped of leading and +trailing spaces before being parsed as inline content. The header level +is equal to the number of `#` characters in the opening sequence. Simple headers: @@ -614,16 +614,24 @@ header:

foo ### b

. +The closing sequence must be preceded by a space: + +. +# foo# +. +

foo#

+. + Backslash-escaped `#` characters do not count as part of the closing sequence: . ### foo \### -## foo \#\## +## foo #\## # foo \# . -

foo #

-

foo ##

+

foo ###

+

foo ###

foo #

. diff --git a/src/blocks.c b/src/blocks.c index ae106d2..7613c82 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -432,15 +432,15 @@ static void chop_trailing_hashtags(chunk *ch) chunk_rtrim(ch); orig_n = n = ch->len - 1; - // if string ends in #s, remove these: + // if string ends in space followed by #s, remove these: while (n >= 0 && peek_at(ch, n) == '#') n--; - // the last # was escaped, so we include it. - if (n != orig_n && n >= 0 && peek_at(ch, n) == '\\') - n++; - - ch->len = n + 1; + // Check for a be a space before the final #s: + if (n != orig_n && n >= 0 && peek_at(ch, n) == ' ') { + ch->len = n; + chunk_rtrim(ch); + } } // Process one line at a time, modifying a node_block. -- cgit v1.2.3 From adc78c85809fb2f79f967d29713e2d9f520aea5b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 28 Oct 2014 22:21:45 -0700 Subject: Rename number_of_emphasis_openers -> emphasis_nestlevel. --- src/inlines.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src') diff --git a/src/inlines.c b/src/inlines.c index 928a3ac..d282170 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -23,7 +23,7 @@ typedef struct Subject { int label_nestlevel; reference_map *refmap; inline_stack *emphasis_openers; - int number_of_emphasis_openers; + int emphasis_nestlevel; } subject; static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap); @@ -178,7 +178,7 @@ static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap) e->label_nestlevel = 0; e->refmap = refmap; e->emphasis_openers = NULL; - e->number_of_emphasis_openers = 0; + e->emphasis_nestlevel = 0; chunk_rtrim(&e->input); } @@ -192,7 +192,7 @@ static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap) e->label_nestlevel = 0; e->refmap = refmap; e->emphasis_openers = NULL; - e->number_of_emphasis_openers = 0; + e->emphasis_nestlevel = 0; chunk_rtrim(&e->input); } @@ -312,7 +312,7 @@ static void free_openers(subject* subj, inline_stack* istack) while (subj->emphasis_openers != istack) { tempstack = subj->emphasis_openers; subj->emphasis_openers = subj->emphasis_openers->previous; - subj->number_of_emphasis_openers--; + subj->emphasis_nestlevel--; free(tempstack); } } @@ -393,7 +393,7 @@ static node_inl* handle_strong_emph(subject* subj, unsigned char c, node_inl **l cannotClose: inl_text = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims)); - if (can_open && subj->number_of_emphasis_openers < EMPHASIS_STACK_LIMIT) + if (can_open && subj->emphasis_nestlevel < EMPHASIS_STACK_LIMIT) { istack = (inline_stack*)malloc(sizeof(inline_stack)); if (istack == NULL) { @@ -404,7 +404,7 @@ cannotClose: istack->first_inline = inl_text; istack->previous = subj->emphasis_openers; subj->emphasis_openers = istack; - subj->number_of_emphasis_openers++; + subj->emphasis_nestlevel++; } return inl_text; -- cgit v1.2.3 From 9253c0eab573e5ca0d37e4b1db77d3b0bfcf3be6 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 28 Oct 2014 22:57:01 -0700 Subject: Changed EMPHASIS_STACK_LIMIT -> STACK_LIMIT. We'll also use it in parsing bracketed link labels. --- src/cmark.h | 2 +- src/inlines.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/cmark.h b/src/cmark.h index 5e508e4..e34df72 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -9,7 +9,7 @@ #define VERSION "0.1" #define CODE_INDENT 4 -#define EMPHASIS_STACK_LIMIT 1000 +#define STACK_LIMIT 1000 struct node_inl { enum { diff --git a/src/inlines.c b/src/inlines.c index d282170..26eb02a 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -393,7 +393,7 @@ static node_inl* handle_strong_emph(subject* subj, unsigned char c, node_inl **l cannotClose: inl_text = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims)); - if (can_open && subj->emphasis_nestlevel < EMPHASIS_STACK_LIMIT) + if (can_open && subj->emphasis_nestlevel < STACK_LIMIT) { istack = (inline_stack*)malloc(sizeof(inline_stack)); if (istack == NULL) { -- cgit v1.2.3 From 67619a5d5c71c44565a9a0413aaf78f9baece528 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 28 Oct 2014 23:03:02 -0700 Subject: Disallow bracketed labels nested more than 1000 deep. For performance reasons. See #166. --- src/inlines.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src') diff --git a/src/inlines.c b/src/inlines.c index 26eb02a..9216979 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -594,7 +594,8 @@ static int link_label(subject* subj, chunk *raw_label) advance(subj); // advance past [ unsigned char c; - while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) { + while ((c = peek_char(subj)) && + (c != ']' || (nestlevel > 0 && nestlevel < STACK_LIMIT))) { switch (c) { case '`': tmp = handle_backticks(subj); @@ -622,7 +623,7 @@ static int link_label(subject* subj, chunk *raw_label) advance(subj); } } - if (c == ']') { + if (nestlevel == 0 && c == ']') { *raw_label = chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1)); subj->label_nestlevel = 0; advance(subj); // advance past ] -- cgit v1.2.3