diff options
Diffstat (limited to 'js/lib')
-rw-r--r-- | js/lib/blocks.js | 124 | ||||
-rw-r--r-- | js/lib/html.js | 50 | ||||
-rwxr-xr-x | js/lib/index.js | 14 | ||||
-rw-r--r-- | js/lib/inlines.js | 46 | ||||
-rw-r--r-- | js/lib/node.js | 24 |
5 files changed, 169 insertions, 89 deletions
diff --git a/js/lib/blocks.js b/js/lib/blocks.js index c6e5d75..bd00b1a 100644 --- a/js/lib/blocks.js +++ b/js/lib/blocks.js @@ -1,35 +1,66 @@ var Node = require('./node'); var C_GREATERTHAN = 62; +var C_NEWLINE = 10; var C_SPACE = 32; var C_OPEN_BRACKET = 91; var InlineParser = require('./inlines'); + var unescapeString = new InlineParser().unescapeString; +var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)'; + +var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" + + "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])"; + +var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i'); + +var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; + +var reMaybeSpecial = /^[ #`~*+_=<>0-9-]/; + +var reNonSpace = /[^ \t\n]/; + +var reBulletListMarker = /^[*+-]( +|$)/; + +var reOrderedListMarker = /^(\d+)([.)])( +|$)/; + +var reATXHeaderMarker = /^#{1,6}(?: +|$)/; + +var reCodeFence = /^`{3,}(?!.*`)|^~{3,}(?!.*~)/; + +var reClosingCodeFence = /^(?:`{3,}|~{3,})(?= *$)/; + +var reSetextHeaderLine = /^(?:=+|-+) *$/; + +var reLineEnding = /\r\n|\n|\r/; + // Returns true if string contains only space characters. var isBlank = function(s) { "use strict"; - return /^\s*$/.test(s); + return !(reNonSpace.test(s)); }; +var tabSpaces = [' ', ' ', ' ', ' ']; + // Convert tabs to spaces on each line using a 4-space tab stop. var detabLine = function(text) { "use strict"; - if (text.indexOf('\u0000') !== -1) { - // replace NUL for security - text = text.replace(/\0/g, '\uFFFD'); - } - if (text.indexOf('\t') === -1) { - return text; - } else { - var lastStop = 0; - return text.replace(/\t/g, function(match, offset) { - var result = ' '.slice((offset - lastStop) % 4); - lastStop = offset + 1; - return result; - }); + + var start = 0; + var offset; + var lastStop = 0; + + while ((offset = text.indexOf('\t', start)) !== -1) { + var numspaces = (offset - lastStop) % 4; + var spaces = tabSpaces[numspaces]; + text = text.slice(0, offset) + spaces + text.slice(offset + 1); + lastStop = offset + numspaces; + start = lastStop; } + + return text; }; // Attempt to match a regex in string s at offset offset. @@ -44,13 +75,15 @@ var matchAt = function(re, s, offset) { } }; -var BLOCKTAGNAME = '(?:article|header|aside|hgroup|iframe|blockquote|hr|body|li|map|button|object|canvas|ol|caption|output|col|p|colgroup|pre|dd|progress|div|section|dl|table|td|dt|tbody|embed|textarea|fieldset|tfoot|figcaption|th|figure|thead|footer|footer|tr|form|ul|h1|h2|h3|h4|h5|h6|video|script|style)'; -var HTMLBLOCKOPEN = "<(?:" + BLOCKTAGNAME + "[\\s/>]" + "|" + - "/" + BLOCKTAGNAME + "[\\s>]" + "|" + "[?!])"; -var reHtmlBlockOpen = new RegExp('^' + HTMLBLOCKOPEN, 'i'); - -var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; - +// destructively trip final blank lines in an array of strings +var stripFinalBlankLines = function(lns) { + "use strict"; + var i = lns.length - 1; + while (!reNonSpace.test(lns[i])) { + lns.pop(); + i--; + } +}; // DOC PARSER @@ -160,12 +193,12 @@ var parseListMarker = function(ln, offset) { if (rest.match(reHrule)) { return null; } - if ((match = rest.match(/^[*+-]( +|$)/))) { + if ((match = rest.match(reBulletListMarker))) { spaces_after_marker = match[1].length; data.type = 'Bullet'; data.bullet_char = match[0][0]; - } else if ((match = rest.match(/^(\d+)([.)])( +|$)/))) { + } else if ((match = rest.match(reOrderedListMarker))) { spaces_after_marker = match[3].length; data.type = 'Ordered'; data.start = parseInt(match[1]); @@ -214,6 +247,11 @@ var incorporateLine = function(ln, line_number) { var container = this.doc; var oldtip = this.tip; + // replace NUL characters for security + if (ln.indexOf('\u0000') !== -1) { + ln = ln.replace(/\0/g, '\uFFFD'); + } + // Convert tabs to spaces: ln = detabLine(ln); @@ -226,7 +264,7 @@ var incorporateLine = function(ln, line_number) { } container = container.lastChild; - match = matchAt(/[^ ]/, ln, offset); + match = matchAt(reNonSpace, ln, offset); if (match === -1) { first_nonspace = ln.length; blank = true; @@ -319,13 +357,11 @@ var incorporateLine = function(ln, line_number) { // want to close unmatched blocks. So we store this closure for // use later, when we have more information. var closeUnmatchedBlocks = function(mythis) { - var already_done = false; // finalize any blocks not matched - while (!already_done && oldtip !== last_matched_container) { + while (oldtip !== last_matched_container) { mythis.finalize(oldtip, line_number - 1); oldtip = oldtip.parent; } - already_done = true; }; // Check to see if we've hit 2nd blank line; if so break out of list: @@ -339,9 +375,9 @@ var incorporateLine = function(ln, line_number) { container.t !== 'IndentedCode' && container.t !== 'HtmlBlock' && // this is a little performance optimization: - matchAt(/^[ #`~*+_=<>0-9-]/, ln, offset) !== -1) { + matchAt(reMaybeSpecial, ln, offset) !== -1) { - match = matchAt(/[^ ]/, ln, offset); + match = matchAt(reNonSpace, ln, offset); if (match === -1) { first_nonspace = ln.length; blank = true; @@ -371,7 +407,7 @@ var incorporateLine = function(ln, line_number) { closeUnmatchedBlocks(this); container = this.addChild('BlockQuote', line_number, offset); - } else if ((match = ln.slice(first_nonspace).match(/^#{1,6}(?: +|$)/))) { + } else if ((match = ln.slice(first_nonspace).match(reATXHeaderMarker))) { // ATX header offset = first_nonspace + match[0].length; closeUnmatchedBlocks(this); @@ -382,7 +418,7 @@ var incorporateLine = function(ln, line_number) { [ln.slice(offset).replace(/^ *#+ *$/, '').replace(/ +#+ *$/, '')]; break; - } else if ((match = ln.slice(first_nonspace).match(/^`{3,}(?!.*`)|^~{3,}(?!.*~)/))) { + } else if ((match = ln.slice(first_nonspace).match(reCodeFence))) { // fenced code block var fence_length = match[0].length; closeUnmatchedBlocks(this); @@ -402,7 +438,7 @@ var incorporateLine = function(ln, line_number) { } else if (container.t === 'Paragraph' && container.strings.length === 1 && - ((match = ln.slice(first_nonspace).match(/^(?:=+|-+) *$/)))) { + ((match = ln.slice(first_nonspace).match(reSetextHeaderLine)))) { // setext header line closeUnmatchedBlocks(this); container.t = 'Header'; // convert Paragraph to SetextHeader @@ -447,7 +483,7 @@ var incorporateLine = function(ln, line_number) { // What remains at the offset is a text line. Add the text to the // appropriate container. - match = matchAt(/[^ ]/, ln, offset); + match = matchAt(reNonSpace, ln, offset); if (match === -1) { first_nonspace = ln.length; blank = true; @@ -500,7 +536,7 @@ var incorporateLine = function(ln, line_number) { // check for closing code fence: match = (indent <= 3 && ln.charAt(first_nonspace) === container.fence_char && - ln.slice(first_nonspace).match(/^(?:`{3,}|~{3,})(?= *$)/)); + ln.slice(first_nonspace).match(reClosingCodeFence)); if (match && match[0].length >= container.fence_length) { // don't add closing fence to container; instead, close it: this.finalize(container, line_number); @@ -569,7 +605,8 @@ var finalize = function(block, line_number) { break; case 'IndentedCode': - block.literal = block.strings.join('\n').replace(/(\n *)*$/, '\n'); + stripFinalBlankLines(block.strings); + block.literal = block.strings.join('\n') + '\n'; block.t = 'CodeBlock'; break; @@ -644,21 +681,31 @@ var parse = function(input) { this.doc = Document(); this.tip = this.doc; this.refmap = {}; - var lines = input.replace(/\n$/, '').split(/\r\n|\n|\r/); + if (this.options.time) { console.time("preparing input"); } + var lines = input.split(reLineEnding); var len = lines.length; + if (input.charCodeAt(input.length - 1) === C_NEWLINE) { + // ignore last blank line created by final newline + len -= 1; + } + if (this.options.time) { console.timeEnd("preparing input"); } + if (this.options.time) { console.time("block parsing"); } for (var i = 0; i < len; i++) { this.incorporateLine(lines[i], i + 1); } while (this.tip) { this.finalize(this.tip, len); } + if (this.options.time) { console.timeEnd("block parsing"); } + if (this.options.time) { console.time("inline parsing"); } this.processInlines(this.doc); + if (this.options.time) { console.timeEnd("inline parsing"); } return this.doc; }; // The DocParser object. -function DocParser(){ +function DocParser(options){ "use strict"; return { doc: Document(), @@ -672,7 +719,8 @@ function DocParser(){ incorporateLine: incorporateLine, finalize: finalize, processInlines: processInlines, - parse: parse + parse: parse, + options: options || {} }; } diff --git a/js/lib/html.js b/js/lib/html.js index 26c677b..847ed98 100644 --- a/js/lib/html.js +++ b/js/lib/html.js @@ -19,31 +19,38 @@ var tag = function(name, attrs, selfclosing) { return result; }; -var renderNodes = function(block, options) { +var reHtmlTag = /\<[^>]*\>/; + +var renderNodes = function(block) { var attrs; var info_words; var tagname; var walker = block.walker(); var event, node, entering; - var buffer = []; + var buffer = ""; + var lastOut = "\n"; var disableTags = 0; var grandparent; var out = function(s) { if (disableTags > 0) { - buffer.push(s.replace(/\<[^>]*\>/g, '')); + buffer += s.replace(reHtmlTag, ''); } else { - buffer.push(s); + buffer += s; } + lastOut = s; }; var esc = this.escape; var cr = function() { - if (buffer.length > 0 && buffer[buffer.length - 1] !== '\n') { - out('\n'); + if (lastOut !== '\n') { + buffer += '\n'; + lastOut = '\n'; } }; - options = options || {}; + var options = this.options; + + if (options.time) { console.time("rendering"); } while ((event = walker.next())) { entering = event.entering; @@ -81,10 +88,6 @@ var renderNodes = function(block, options) { out(tag(entering ? 'strong' : '/strong')); break; - case 'Emph': - out(tag(entering ? 'strong' : '/strong')); - break; - case 'Html': out(node.literal); break; @@ -198,7 +201,7 @@ var renderNodes = function(block, options) { } cr(); out(tag('pre') + tag('code', attrs)); - out(this.escape(node.literal)); + out(esc(node.literal)); out(tag('/code') + tag('/pre')); cr(); break; @@ -220,14 +223,15 @@ var renderNodes = function(block, options) { break; default: - console.log("Unknown node type " + node.t); + throw("Unknown node type " + node.t); } } - return buffer.join(''); + if (options.time) { console.timeEnd("rendering"); } + return buffer; }; -var sub = function(s) { +var replaceUnsafeChar = function(s) { switch (s) { case '&': return '&'; @@ -242,23 +246,27 @@ var sub = function(s) { } }; +var reNeedsEscaping = /[&<>"]/; // The HtmlRenderer object. -function HtmlRenderer(){ +function HtmlRenderer(options){ return { // default options: - blocksep: '\n', // space between blocks - innersep: '\n', // space between block container tag and contents softbreak: '\n', // by default, soft breaks are rendered as newlines in HTML // set to "<br />" to make them hard breaks // set to " " if you want to ignore line wrapping in source escape: function(s, preserve_entities) { - if (preserve_entities) { - return s.replace(/[&](?:[#](x[a-f0-9]{1,8}|[0-9]{1,8});|[a-z][a-z0-9]{1,31};)|[&<>"]/gi, sub); + if (reNeedsEscaping.test(s)) { + if (preserve_entities) { + return s.replace(/[&](?:[#](x[a-f0-9]{1,8}|[0-9]{1,8});|[a-z][a-z0-9]{1,31};)|[&<>"]/gi, replaceUnsafeChar); + } else { + return s.replace(/[&<>"]/g, replaceUnsafeChar); + } } else { - return s.replace(/[&<>"]/g, sub); + return s; } }, + options: options || {}, render: renderNodes }; } diff --git a/js/lib/index.js b/js/lib/index.js index d0532c6..22a2184 100755 --- a/js/lib/index.js +++ b/js/lib/index.js @@ -13,11 +13,15 @@ var util = require('util'); -var renderAST = function(tree) { - return util.inspect(tree.toAST(), {depth: 20}) + '\n'; -}; - module.exports.Node = require('./node'); module.exports.DocParser = require('./blocks'); module.exports.HtmlRenderer = require('./html'); -module.exports.ASTRenderer = renderAST; +module.exports.ASTRenderer = function(options) { + return { + render: function(tree) { + return util.inspect(tree.toAST(), null, 20, + this.options.colors) + '\n'; + }, + options: options || {} + }; +} diff --git a/js/lib/inlines.js b/js/lib/inlines.js index 72c4448..4d49861 100644 --- a/js/lib/inlines.js +++ b/js/lib/inlines.js @@ -65,6 +65,8 @@ var reEntityHere = new RegExp('^' + ENTITY, 'i'); var reEntityOrEscapedChar = new RegExp('\\\\' + ESCAPABLE + '|' + ENTITY, 'gi'); +var reBackslashOrAmp = /[\\&]/; + var reTicks = new RegExp('`+'); var reTicksHere = new RegExp('^`+'); @@ -75,6 +77,18 @@ var reAutolink = /^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data var reSpnl = /^ *(?:\n *)?/; +var reWhitespaceChar = /^\s/; + +var reWhitespace = /\s+/g; + +var reFinalSpace = / *$/; + +var reInitialSpace = /^ */; + +var reAsciiAlnum = /[a-z0-9]/i; + +var reLinkLabel = /^\[(?:[^\\\[\]]|\\[\[\]]){0,1000}\]/; + // Matches a string of non-special characters. var reMain = /^[^\n`\[\]\\!<&*_]+/m; @@ -90,7 +104,11 @@ var unescapeChar = function(s) { // Replace entities and backslash escapes with literal characters. var unescapeString = function(s) { "use strict"; - return s.replace(reEntityOrEscapedChar, unescapeChar); + if (reBackslashOrAmp.test(s)) { + return s.replace(reEntityOrEscapedChar, unescapeChar); + } else { + return s; + } }; // Normalize reference label: collapse internal whitespace @@ -167,8 +185,7 @@ var parseBackticks = function(block) { node = new Node('Code'); node.literal = this.subject.slice(afterOpenTicks, this.pos - ticks.length) - .replace(/[ \n]+/g, ' ') - .trim(); + .trim().replace(reWhitespace, ' '); block.appendChild(node); return true; } @@ -270,17 +287,17 @@ var scanDelims = function(cc) { char_after = fromCodePoint(cc_after); } - var can_open = numdelims > 0 && !(/\s/.test(char_after)) && + var can_open = numdelims > 0 && !(reWhitespaceChar.test(char_after)) && !(rePunctuation.test(char_after) && !(/\s/.test(char_before)) && !(rePunctuation.test(char_before))); - var can_close = numdelims > 0 && !(/\s/.test(char_before)) && + var can_close = numdelims > 0 && !(reWhitespaceChar.test(char_before)) && !(rePunctuation.test(char_before) && - !(/\s/.test(char_after)) && + !(reWhitespaceChar.test(char_after)) && !(rePunctuation.test(char_after))); if (cc === C_UNDERSCORE) { - can_open = can_open && !((/[a-z0-9]/i).test(char_before)); - can_close = can_close && !((/[a-z0-9]/i).test(char_after)); + can_open = can_open && !((reAsciiAlnum).test(char_before)); + can_close = can_close && !((reAsciiAlnum).test(char_after)); } this.pos = startpos; return { numdelims: numdelims, @@ -463,7 +480,7 @@ var parseLinkDestination = function() { // Attempt to parse a link label, returning number of characters parsed. var parseLinkLabel = function() { "use strict"; - var m = this.match(/^\[(?:[^\\\[\]]|\\[\[\]]){0,1000}\]/); + var m = this.match(reLinkLabel); return m === null ? 0 : m.length; }; @@ -581,10 +598,11 @@ var parseCloseBracket = function(block) { ((dest = this.parseLinkDestination()) !== null) && this.spnl() && // make sure there's a space before the title: - (/^\s/.test(this.subject.charAt(this.pos - 1)) && + (reWhitespaceChar.test(this.subject.charAt(this.pos - 1)) && (title = this.parseLinkTitle() || '') || true) && this.spnl() && - this.match(/^\)/)) { + this.subject.charAt(this.pos) === ')') { + this.pos += 1; matched = true; } } else { @@ -691,15 +709,15 @@ var parseNewline = function(block) { // check previous node for trailing spaces var lastc = block.lastChild; if (lastc && lastc.t === 'Text') { - var sps = / *$/.exec(lastc.literal)[0].length; + var sps = reFinalSpace.exec(lastc.literal)[0].length; if (sps > 0) { - lastc.literal = lastc.literal.replace(/ *$/, ''); + lastc.literal = lastc.literal.replace(reFinalSpace, ''); } block.appendChild(new Node(sps >= 2 ? 'Hardbreak' : 'Softbreak')); } else { block.appendChild(new Node('Softbreak')); } - this.match(/^ */); // gobble leading spaces in next line + this.match(reInitialSpace); // gobble leading spaces in next line return true; }; diff --git a/js/lib/node.js b/js/lib/node.js index 84fb122..9dc7c3f 100644 --- a/js/lib/node.js +++ b/js/lib/node.js @@ -14,18 +14,12 @@ function isContainer(node) { t === 'Image'); } -function NodeWalker(root) { - this.current = root; - this.root = root; - this.entering = true; -} - -NodeWalker.prototype.resumeAt = function(node, entering) { +var resumeAt = function(node, entering) { this.current = node; this.entering = (entering === true); }; -NodeWalker.prototype.next = function(){ +var next = function(){ var cur = this.current; var entering = this.entering; @@ -56,7 +50,15 @@ NodeWalker.prototype.next = function(){ return {entering: entering, node: cur}; }; -function Node(nodeType, sourcepos) { +var NodeWalker = function(root) { + return { current: root, + root: root, + entering: true, + next: next, + resumeAt: resumeAt }; +}; + +var Node = function(nodeType, sourcepos) { this.t = nodeType; this.parent = null; this.firstChild = null; @@ -77,7 +79,7 @@ function Node(nodeType, sourcepos) { this.fence_length = undefined; this.fence_offset = undefined; this.level = undefined; -} +}; Node.prototype.isContainer = function() { return isContainer(this); @@ -154,7 +156,7 @@ Node.prototype.insertBefore = function(sibling) { }; Node.prototype.walker = function() { - var walker = new NodeWalker(this); + var walker = NodeWalker(this); return walker; }; |