diff options
Diffstat (limited to 'js/lib/blocks.js')
-rw-r--r-- | js/lib/blocks.js | 124 |
1 files changed, 86 insertions, 38 deletions
diff --git a/js/lib/blocks.js b/js/lib/blocks.js index c6e5d75..bd00b1a 100644 --- a/js/lib/blocks.js +++ b/js/lib/blocks.js @@ -1,35 +1,66 @@ var Node = require('./node'); var C_GREATERTHAN = 62; +var C_NEWLINE = 10; var C_SPACE = 32; var C_OPEN_BRACKET = 91; var InlineParser = require('./inlines'); + var unescapeString = new InlineParser().unescapeString; +var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)'; + +var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" + + "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])"; + +var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i'); + +var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; + +var reMaybeSpecial = /^[ #`~*+_=<>0-9-]/; + +var reNonSpace = /[^ \t\n]/; + +var reBulletListMarker = /^[*+-]( +|$)/; + +var reOrderedListMarker = /^(\d+)([.)])( +|$)/; + +var reATXHeaderMarker = /^#{1,6}(?: +|$)/; + +var reCodeFence = /^`{3,}(?!.*`)|^~{3,}(?!.*~)/; + +var reClosingCodeFence = /^(?:`{3,}|~{3,})(?= *$)/; + +var reSetextHeaderLine = /^(?:=+|-+) *$/; + +var reLineEnding = /\r\n|\n|\r/; + // Returns true if string contains only space characters. var isBlank = function(s) { "use strict"; - return /^\s*$/.test(s); + return !(reNonSpace.test(s)); }; +var tabSpaces = [' ', ' ', ' ', ' ']; + // Convert tabs to spaces on each line using a 4-space tab stop. var detabLine = function(text) { "use strict"; - if (text.indexOf('\u0000') !== -1) { - // replace NUL for security - text = text.replace(/\0/g, '\uFFFD'); - } - if (text.indexOf('\t') === -1) { - return text; - } else { - var lastStop = 0; - return text.replace(/\t/g, function(match, offset) { - var result = ' '.slice((offset - lastStop) % 4); - lastStop = offset + 1; - return result; - }); + + var start = 0; + var offset; + var lastStop = 0; + + while ((offset = text.indexOf('\t', start)) !== -1) { + var numspaces = (offset - lastStop) % 4; + var spaces = tabSpaces[numspaces]; + text = text.slice(0, offset) + spaces + text.slice(offset + 1); + lastStop = offset + numspaces; + start = lastStop; } + + return text; }; // Attempt to match a regex in string s at offset offset. @@ -44,13 +75,15 @@ var matchAt = function(re, s, offset) { } }; -var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)'; -var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" + - "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])"; -var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i'); - -var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; - +// destructively trip final blank lines in an array of strings +var stripFinalBlankLines = function(lns) { + "use strict"; + var i = lns.length - 1; + while (!reNonSpace.test(lns[i])) { + lns.pop(); + i--; + } +}; // DOC PARSER @@ -160,12 +193,12 @@ var parseListMarker = function(ln, offset) { if (rest.match(reHrule)) { return null; } - if ((match = rest.match(/^[*+-]( +|$)/))) { + if ((match = rest.match(reBulletListMarker))) { spaces_after_marker = match[1].length; data.type = 'Bullet'; data.bullet_char = match[0][0]; - } else if ((match = rest.match(/^(\d+)([.)])( +|$)/))) { + } else if ((match = rest.match(reOrderedListMarker))) { spaces_after_marker = match[3].length; data.type = 'Ordered'; data.start = parseInt(match[1]); @@ -214,6 +247,11 @@ var incorporateLine = function(ln, line_number) { var container = this.doc; var oldtip = this.tip; + // replace NUL characters for security + if (ln.indexOf('\u0000') !== -1) { + ln = ln.replace(/\0/g, '\uFFFD'); + } + // Convert tabs to spaces: ln = detabLine(ln); @@ -226,7 +264,7 @@ var incorporateLine = function(ln, line_number) { } container = container.lastChild; - match = matchAt(/[^ ]/, ln, offset); + match = matchAt(reNonSpace, ln, offset); if (match === -1) { first_nonspace = ln.length; blank = true; @@ -319,13 +357,11 @@ var incorporateLine = function(ln, line_number) { // want to close unmatched blocks. So we store this closure for // use later, when we have more information. var closeUnmatchedBlocks = function(mythis) { - var already_done = false; // finalize any blocks not matched - while (!already_done && oldtip !== last_matched_container) { + while (oldtip !== last_matched_container) { mythis.finalize(oldtip, line_number - 1); oldtip = oldtip.parent; } - already_done = true; }; // Check to see if we've hit 2nd blank line; if so break out of list: @@ -339,9 +375,9 @@ var incorporateLine = function(ln, line_number) { container.t !== 'IndentedCode' && container.t !== 'HtmlBlock' && // this is a little performance optimization: - matchAt(/^[ #`~*+_=<>0-9-]/, ln, offset) !== -1) { + matchAt(reMaybeSpecial, ln, offset) !== -1) { - match = matchAt(/[^ ]/, ln, offset); + match = matchAt(reNonSpace, ln, offset); if (match === -1) { first_nonspace = ln.length; blank = true; @@ -371,7 +407,7 @@ var incorporateLine = function(ln, line_number) { closeUnmatchedBlocks(this); container = this.addChild('BlockQuote', line_number, offset); - } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) { + } else if ((match = ln.slice(first_nonspace).match(reATXHeaderMarker))) { // ATX header offset = first_nonspace + match[0].length; closeUnmatchedBlocks(this); @@ -382,7 +418,7 @@ var incorporateLine = function(ln, line_number) { [ln.slice(offset).replace(/^ *#+ *$/, '').replace(/ +#+ *$/, '')]; break; - } else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) { + } else if ((match = ln.slice(first_nonspace).match(reCodeFence))) { // fenced code block var fence_length = match[0].length; closeUnmatchedBlocks(this); @@ -402,7 +438,7 @@ var incorporateLine = function(ln, line_number) { } else if (container.t === 'Paragraph' && container.strings.length === 1 && - ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) { + ((match = ln.slice(first_nonspace).match(reSetextHeaderLine)))) { // setext header line closeUnmatchedBlocks(this); container.t = 'Header'; // convert Paragraph to SetextHeader @@ -447,7 +483,7 @@ var incorporateLine = function(ln, line_number) { // What remains at the offset is a text line. Add the text to the // appropriate container. - match = matchAt(/[^ ]/, ln, offset); + match = matchAt(reNonSpace, ln, offset); if (match === -1) { first_nonspace = ln.length; blank = true; @@ -500,7 +536,7 @@ var incorporateLine = function(ln, line_number) { // check for closing code fence: match = (indent <= 3 && ln.charAt(first_nonspace) === container.fence_char && - ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/)); + ln.slice(first_nonspace).match(reClosingCodeFence)); if (match && match[0].length >= container.fence_length) { // don't add closing fence to container; instead, close it: this.finalize(container, line_number); @@ -569,7 +605,8 @@ var finalize = function(block, line_number) { break; case 'IndentedCode': - block.literal = block.strings.join('\n').replace(/(\n *)*$/, '\n'); + stripFinalBlankLines(block.strings); + block.literal = block.strings.join('\n') + '\n'; block.t = 'CodeBlock'; break; @@ -644,21 +681,31 @@ var parse = function(input) { this.doc = Document(); this.tip = this.doc; this.refmap = {}; - var lines = input.replace(/\n$/, '').split(/\r\n|\n|\r/); + if (this.options.time) { console.time("preparing input"); } + var lines = input.split(reLineEnding); var len = lines.length; + if (input.charCodeAt(input.length - 1) === C_NEWLINE) { + // ignore last blank line created by final newline + len -= 1; + } + if (this.options.time) { console.timeEnd("preparing input"); } + if (this.options.time) { console.time("block parsing"); } for (var i = 0; i < len; i++) { this.incorporateLine(lines[i], i + 1); } while (this.tip) { this.finalize(this.tip, len); } + if (this.options.time) { console.timeEnd("block parsing"); } + if (this.options.time) { console.time("inline parsing"); } this.processInlines(this.doc); + if (this.options.time) { console.timeEnd("inline parsing"); } return this.doc; }; // The DocParser object. -function DocParser(){ +function DocParser(options){ "use strict"; return { doc: Document(), @@ -672,7 +719,8 @@ function DocParser(){ incorporateLine: incorporateLine, finalize: finalize, processInlines: processInlines, - parse: parse + parse: parse, + options: options || {} }; } |