From 14b997d9350b3ee3f6d67fb12b470bf406d4a31b Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sun, 18 Jan 2015 10:41:54 -0800 Subject: Changed rule for `_` emphasis and strong emphasis. To prevent intra-word emphasis, we used to check to see if the delimiter was followed/preceded by an ASCII alphanumeric. We now do something more elegant: whereas an opening `*` must be left-flanking, an opening `_` must be left-flanking *and not right-flanking*. And so on for the other cases. All the original tests passed except some tests with Russian text with internal `_`, which formerly created emphasis but no longer do with the new rule. These tests have been adjusted. A few new test cases have been added to illustrate the rule. The C and JS implementations have both been updated. --- js/lib/inlines.js | 16 ++++++++++------ spec.txt | 40 ++++++++++++++++++++++++++++------------ src/inlines.c | 14 ++++++++------ 3 files changed, 46 insertions(+), 24 deletions(-) diff --git a/js/lib/inlines.js b/js/lib/inlines.js index b9bf805..79d2c90 100644 --- a/js/lib/inlines.js +++ b/js/lib/inlines.js @@ -87,8 +87,6 @@ var reFinalSpace = / *$/; var reInitialSpace = /^ */; -var reAsciiAlnum = /[a-z0-9]/i; - var reLinkLabel = /^\[(?:[^\\\[\]]|\\[\[\]]){0,1000}\]/; // Matches a string of non-special characters. @@ -238,6 +236,7 @@ var scanDelims = function(cc) { var numdelims = 0; var char_before, char_after, cc_after; var startpos = this.pos; + var left_flanking, right_flanking, can_open, can_close; char_before = this.pos === 0 ? '\n' : this.subject.charAt(this.pos - 1); @@ -254,17 +253,22 @@ var scanDelims = function(cc) { char_after = fromCodePoint(cc_after); } - var can_open = numdelims > 0 && !(reWhitespaceChar.test(char_after)) && + left_flanking = numdelims > 0 && + !(reWhitespaceChar.test(char_after)) && !(rePunctuation.test(char_after) && !(/\s/.test(char_before)) && !(rePunctuation.test(char_before))); - var can_close = numdelims > 0 && !(reWhitespaceChar.test(char_before)) && + right_flanking = numdelims > 0 && + !(reWhitespaceChar.test(char_before)) && !(rePunctuation.test(char_before) && !(reWhitespaceChar.test(char_after)) && !(rePunctuation.test(char_after))); if (cc === C_UNDERSCORE) { - can_open = can_open && !((reAsciiAlnum).test(char_before)); - can_close = can_close && !((reAsciiAlnum).test(char_after)); + can_open = left_flanking && !right_flanking; + can_close = right_flanking && !left_flanking; + } else { + can_open = left_flanking; + can_close = right_flanking; } this.pos = startpos; return { numdelims: numdelims, diff --git a/spec.txt b/spec.txt index 6df9078..2366163 100644 --- a/spec.txt +++ b/spec.txt @@ -4547,28 +4547,28 @@ The following rules define emphasis and strong emphasis: 2. A single `_` character [can open emphasis] iff it is part of a [left-flanking delimiter run] - and is not preceded by an ASCII alphanumeric character. + and not part of a [right-flanking delimiter run]. 3. A single `*` character [can close emphasis](@can-close-emphasis) iff it is part of a [right-flanking delimiter run]. 4. A single `_` character [can close emphasis] - iff it is part of a [right-flanking delimiter run]. - and it is not followed by an ASCII alphanumeric character. + iff it is part of a [right-flanking delimiter run] + and not part of a [left-flanking delimiter run]. 5. A double `**` [can open strong emphasis](@can-open-strong-emphasis) iff it is part of a [left-flanking delimiter run]. 6. A double `__` [can open strong emphasis] iff it is part of a [left-flanking delimiter run] - and is not preceded by an ASCII alphanumeric character. + and not part of a [right-flanking delimiter run]. 7. A double `**` [can close strong emphasis](@can-close-strong-emphasis) iff it is part of a [right-flanking delimiter run]. 8. A double `__` [can close strong emphasis] iff it is part of a [right-flanking delimiter run] - and is not followed by an ASCII alphanumeric character. + and not part of a [left-flanking delimiter run]. 9. Emphasis begins with a delimiter that [can open emphasis] and ends with a delimiter that [can close emphasis], and that uses the same @@ -4701,7 +4701,7 @@ a_"foo"_

a_"foo"_

. -Emphasis with `_` is not allowed inside ASCII words: +Emphasis with `_` is not allowed inside words: . foo_bar_ @@ -4715,12 +4715,28 @@ foo_bar_

5_6_78

. -But it is permitted inside non-ASCII words: - . пристаням_стремятся_ . -

пристанямстремятся

+

пристаням_стремятся_

+. + +Here `_` does not generate emphasis, because the first delimiter run +is right-flanking and the second left-flanking: + +. +aa_"bb"_cc +. +

aa_"bb"_cc

+. + +Here there is no emphasis, because the delimiter runs are +both left- and right-flanking: + +. +"aa"_"bb"_"cc" +. +

"aa"_"bb"_"cc"

. Rule 3: @@ -4810,7 +4826,7 @@ _foo_bar . _пристаням_стремятся . -

пристанямстремятся

+

_пристаням_стремятся

. . @@ -4897,7 +4913,7 @@ foo__bar__ . пристаням__стремятся__ . -

пристанямстремятся

+

пристаням__стремятся__

. . @@ -5000,7 +5016,7 @@ __foo__bar . __пристаням__стремятся . -

пристанямстремятся

+

__пристаням__стремятся

. . diff --git a/src/inlines.c b/src/inlines.c index 2487f63..2c12408 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -250,6 +250,7 @@ scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close) int32_t after_char = 0; int32_t before_char = 0; int len; + bool left_flanking, right_flanking; if (subj->pos == 0) { before_char = 10; @@ -277,19 +278,20 @@ scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close) if (len == -1) { after_char = 10; } - *can_open = numdelims > 0 && !utf8proc_is_space(after_char) && + left_flanking = numdelims > 0 && !utf8proc_is_space(after_char) && !(utf8proc_is_punctuation(after_char) && !utf8proc_is_space(before_char) && !utf8proc_is_punctuation(before_char)); - *can_close = numdelims > 0 && !utf8proc_is_space(before_char) && + right_flanking = numdelims > 0 && !utf8proc_is_space(before_char) && !(utf8proc_is_punctuation(before_char) && !utf8proc_is_space(after_char) && !utf8proc_is_punctuation(after_char)); if (c == '_') { - *can_open = *can_open && !(before_char < 128 && - cmark_isalnum((char)before_char)); - *can_close = *can_close && !(before_char < 128 && - cmark_isalnum((char)after_char)); + *can_open = left_flanking && !right_flanking; + *can_close = right_flanking && !left_flanking; + } else { + *can_open = left_flanking; + *can_close = right_flanking; } return numdelims; } -- cgit v1.2.3