diff options
-rw-r--r-- | js/bench.js | 2 | ||||
-rwxr-xr-x | js/bin/commonmark | 2 | ||||
-rw-r--r-- | js/lib/blocks.js | 127 | ||||
-rw-r--r-- | js/lib/html-renderer.js | 311 | ||||
-rwxr-xr-x | js/lib/index.js | 1 | ||||
-rw-r--r-- | js/lib/inlines.js | 257 | ||||
-rw-r--r-- | js/lib/node.js | 195 | ||||
-rwxr-xr-x | js/test.js | 2 |
8 files changed, 555 insertions, 342 deletions
diff --git a/js/bench.js b/js/bench.js index 77f5025..a5e0d91 100644 --- a/js/bench.js +++ b/js/bench.js @@ -17,7 +17,7 @@ suite.add('commonmark.js markdown->html', function() { "use strict"; var doc = new sm.DocParser().parse(contents); var renderer = new sm.HtmlRenderer(); - renderer.renderBlock(doc); + renderer.render(doc); }) .add('showdown.js markdown->html', function() { diff --git a/js/bin/commonmark b/js/bin/commonmark index 3c222f8..ac53e6e 100755 --- a/js/bin/commonmark +++ b/js/bin/commonmark @@ -14,7 +14,7 @@ var files = []; if (process.argv[2] === '--ast') { files = process.argv.slice(3); renderer = { render: function(x) { - return util.inspect(x, null, Infinity) + '\n'; + return util.inspect(x.toAST(), null, Infinity, true) + '\n'; } }; } else { files = process.argv.slice(2); diff --git a/js/lib/blocks.js b/js/lib/blocks.js index 786c099..cc2263a 100644 --- a/js/lib/blocks.js +++ b/js/lib/blocks.js @@ -1,3 +1,5 @@ +Node = require('./node'); + var C_GREATERTHAN = 62; var C_SPACE = 32; var C_OPEN_BRACKET = 91; @@ -48,19 +50,14 @@ var reHrule = /^(?:(?:\* *){3,}|(?:_ *){3,}|(?:- *){3,}) *$/; // These are methods of a DocParser object, defined below. var makeBlock = function(tag, start_line, start_column) { - return { t: tag, - open: true, - last_line_blank: false, - start_line: start_line, - start_column: start_column, - end_line: start_line, - children: [], - parent: null, - // string_content is formed by concatenating strings, in finalize: - string_content: "", - strings: [], - children: [] - }; + var b = new Node(tag); + b.pos.start = [start_line, start_column]; + b.pos.end = []; // assigned in finalization step + b.open = true; + b.last_line_blank = false; + b.string_content = ""; + b.strings = []; + return b; }; // Returns true if parent block can contain child block. @@ -81,14 +78,17 @@ var acceptsLines = function(block_type) { // Returns true if block ends with a blank line, descending if needed // into lists and sublists. var endsWithBlankLine = function(block) { - if (block.last_line_blank) { + while (block) { + if (block.last_line_blank) { return true; + } + if (block.t === 'List' || block.t === 'ListItem') { + block = block.lastChild; + } else { + break; + } } - if ((block.t === 'List' || block.t === 'ListItem') && block.children.length > 0) { - return endsWithBlankLine(block.children[block.children.length - 1]); - } else { - return false; - } + return false; }; // Break out of all containing lists, resetting the tip of the @@ -135,8 +135,7 @@ var addChild = function(tag, line_number, offset) { var column_number = offset + 1; // offset 0 = column 1 var newBlock = makeBlock(tag, line_number, column_number); - this.tip.children.push(newBlock); - newBlock.parent = this.tip; + this.tip.appendChild(newBlock); this.tip = newBlock; return newBlock; }; @@ -190,7 +189,6 @@ var listsMatch = function(list_data, item_data) { var incorporateLine = function(ln, line_number) { var all_matched = true; - var last_child; var first_nonspace; var offset = 0; var match; @@ -210,12 +208,11 @@ var incorporateLine = function(ln, line_number) { // For each containing block, try to parse the associated line start. // Bail out on failure: container will point to the last matching block. // Set all_matched to false if not all containers match. - while (container.children.length > 0) { - last_child = container.children[container.children.length - 1]; - if (!last_child.open) { + while (container.lastChild) { + if (!container.lastChild.open) { break; } - container = last_child; + container = container.lastChild; match = matchAt(/[^ ]/, ln, offset); if (match === -1) { @@ -472,8 +469,8 @@ var incorporateLine = function(ln, line_number) { container.t === 'Header' || container.t === 'FencedCode' || (container.t === 'ListItem' && - container.children.length === 0 && - container.start_line === line_number)); + !container.firstChild && + container.pos.start[0] === line_number)); var cont = container; while (cont.parent) { @@ -537,10 +534,10 @@ var finalize = function(block, line_number) { return 0; } block.open = false; - if (line_number > block.start_line) { - block.end_line = line_number - 1; + if (line_number > block.pos.start[0]) { + block.pos.end = [line_number - 1]; // TODO end column } else { - block.end_line = line_number; + block.pos.end = [line_number]; } switch (block.t) { @@ -584,30 +581,24 @@ var finalize = function(block, line_number) { case 'List': block.tight = true; // tight by default - var numitems = block.children.length; - var i = 0; - while (i < numitems) { - var item = block.children[i]; + var item = block.firstChild; + while (item) { // check for non-final list item ending with blank line: - var last_item = i === numitems - 1; - if (endsWithBlankLine(item) && !last_item) { + if (endsWithBlankLine(item) && item.next) { block.tight = false; break; } // recurse into children of list item, to see if there are // spaces between any of them: - var numsubitems = item.children.length; - var j = 0; - while (j < numsubitems) { - var subitem = item.children[j]; - var last_subitem = j === numsubitems - 1; - if (endsWithBlankLine(subitem) && !(last_item && last_subitem)) { + var subitem = item.firstChild; + while (subitem) { + if (endsWithBlankLine(subitem) && (item.next || subitem.next)) { block.tight = false; break; } - j++; + subitem = subitem.next; } - i++; + item = item.next; } break; @@ -621,45 +612,14 @@ var finalize = function(block, line_number) { // Walk through a block & children recursively, parsing string content // into inline content where appropriate. Returns new object. var processInlines = function(block) { - var newblock = {}; - newblock.t = block.t; - newblock.start_line = block.start_line; - newblock.start_column = block.start_column; - newblock.end_line = block.end_line; - - switch(block.t) { - case 'Paragraph': - newblock.children = - this.inlineParser.parse(block.string_content.trim(), this.refmap); - break; - case 'Header': - newblock.children = - this.inlineParser.parse(block.string_content.trim(), this.refmap); - newblock.level = block.level; - break; - case 'List': - newblock.list_data = block.list_data; - newblock.tight = block.tight; - break; - case 'CodeBlock': - newblock.string_content = block.string_content; - newblock.info = block.info; - break; - case 'HtmlBlock': - newblock.string_content = block.string_content; - break; - default: - break; - } - - if (block.children && !newblock.children) { - var newchildren = []; - for (var i = 0; i < block.children.length; i++) { - newchildren.push(this.processInlines(block.children[i])); + var node, event; + var walker = block.walker(); + while (event = walker.next()) { + node = event.node; + if (!event.entering && (node.t == 'Paragraph' || node.t == 'Header')) { + this.inlineParser.parse(node, this.refmap); } - newblock.children = newchildren; } - return newblock; }; // The main parsing function. Returns a parsed document AST. @@ -675,7 +635,8 @@ var parse = function(input) { while (this.tip) { this.finalize(this.tip, len - 1); } - return this.processInlines(this.doc); + this.processInlines(this.doc); + return this.doc; }; diff --git a/js/lib/html-renderer.js b/js/lib/html-renderer.js index f6efe32..e7953cf 100644 --- a/js/lib/html-renderer.js +++ b/js/lib/html-renderer.js @@ -1,6 +1,6 @@ -// Helper function to produce content in a pair of HTML tags. -var inTags = function(tag, attribs, contents, selfclosing) { - var result = '<' + tag; +// Helper function to produce an HTML tag. +var tag = function(name, attribs, selfclosing) { + var result = '<' + name; if (attribs) { var i = 0; var attrib; @@ -9,132 +9,207 @@ var inTags = function(tag, attribs, contents, selfclosing) { i++; } } - if (contents) { - result = result.concat('>', contents, '</', tag, '>'); - } else if (selfclosing) { - result = result + ' />'; - } else { - result = result.concat('></', tag, '>'); - } - return result; -}; - -// Render an inline element as HTML. -var renderInline = function(inline) { - var attrs; - switch (inline.t) { - case 'Text': - return this.escape(inline.c); - case 'Softbreak': - return this.softbreak; - case 'Hardbreak': - return inTags('br', [], "", true) + '\n'; - case 'Emph': - return inTags('em', [], this.renderInlines(inline.children)); - case 'Strong': - return inTags('strong', [], this.renderInlines(inline.children)); - case 'Html': - return inline.c; - case 'Link': - attrs = [['href', this.escape(inline.destination, true)]]; - if (inline.title) { - attrs.push(['title', this.escape(inline.title, true)]); - } - return inTags('a', attrs, this.renderInlines(inline.children)); - case 'Image': - attrs = [['src', this.escape(inline.destination, true)], - ['alt', this.renderInlines(inline.children). - replace(/\<[^>]*alt="([^"]*)"[^>]*\>/g, '$1'). - replace(/\<[^>]*\>/g, '')]]; - if (inline.title) { - attrs.push(['title', this.escape(inline.title, true)]); - } - return inTags('img', attrs, "", true); - case 'Code': - return inTags('code', [], this.escape(inline.c)); - default: - console.log("Unknown inline type " + inline.t); - return ""; - } -}; + if (selfclosing) + result += ' /'; -// Render a list of inlines. -var renderInlines = function(inlines) { - var result = ''; - for (var i = 0; i < inlines.length; i++) { - result = result + this.renderInline(inlines[i]); - } + result += '>'; return result; }; -// Render a single block element. -var renderBlock = function(block, in_tight_list) { - var tag; - var attr; +var renderNodes = function(block) { + + var attrs; var info_words; - switch (block.t) { - case 'Document': - var whole_doc = this.renderBlocks(block.children); - return (whole_doc === '' ? '' : whole_doc + '\n'); - case 'Paragraph': - if (in_tight_list) { - return this.renderInlines(block.children); + var tagname; + var walker = block.walker(); + var event, node, entering; + var buffer = []; + var disableTags = 0; + var grandparent; + var out = function(s) { + if (disableTags > 0) { + buffer.push(s.replace(/\<[^>]*\>/g, '')); } else { - return inTags('p', [], this.renderInlines(block.children)); - } - break; - case 'BlockQuote': - var filling = this.renderBlocks(block.children); - return inTags('blockquote', [], filling === '' ? this.innersep : - this.innersep + filling + this.innersep); - case 'ListItem': - var contents = this.renderBlocks(block.children, in_tight_list); - if (/^[<]/.test(contents)) { - contents = '\n' + contents; + buffer.push(s); } - if (/[>]$/.test(contents)) { - contents = contents + '\n'; + } + var esc = this.escape; + var cr = function() { + if (buffer.length > 0 && buffer[buffer.length - 1] !== '\n') { + out('\n'); } - return inTags('li', [], contents, false).trim(); - case 'List': - tag = block.list_data.type === 'Bullet' ? 'ul' : 'ol'; - attr = (!block.list_data.start || block.list_data.start === 1) ? - [] : [['start', block.list_data.start.toString()]]; - return inTags(tag, attr, this.innersep + - this.renderBlocks(block.children, block.tight) + - this.innersep); - case 'Header': - tag = 'h' + block.level; - return inTags(tag, [], this.renderInlines(block.children)); - case 'CodeBlock': - info_words = block.info ? block.info.split(/ +/) : []; - attr = (info_words.length === 0 || info_words[0].length === 0) ? - [] : [['class', 'language-' + this.escape(info_words[0], true)]]; - return inTags('pre', [], - inTags('code', attr, this.escape(block.string_content))); - case 'HtmlBlock': - return block.string_content; - case 'ReferenceDef': - return ""; - case 'HorizontalRule': - return inTags('hr', [], "", true); - default: - console.log("Unknown block type " + block.t); - return ""; } -}; -// Render a list of block elements, separated by this.blocksep. -var renderBlocks = function(blocks, in_tight_list) { - var result = []; - for (var i = 0; i < blocks.length; i++) { - if (blocks[i].t !== 'ReferenceDef') { - result.push(this.renderBlock(blocks[i], in_tight_list)); + while (event = walker.next()) { + entering = event.entering; + node = event.node; + + switch (node.t) { + case 'Text': + out(esc(node.c)); + break; + + case 'Softbreak': + out(this.softbreak); + break; + + case 'Hardbreak': + out(tag('br', [], true)); + cr(); + break; + + case 'Emph': + out(tag(entering ? 'em' : '/em')); + break; + + case 'Strong': + out(tag(entering ? 'strong' : '/strong')); + break; + + case 'Emph': + out(tag(entering ? 'strong' : '/strong')); + break; + + case 'Html': + out(node.c); + break; + + case 'Link': + if (entering) { + attrs = [['href', esc(node.destination, true)]]; + if (node.title) { + attrs.push(['title', esc(node.title, true)]); + } + out(tag('a', attrs)); + } else { + out(tag('/a')); + } + break; + + case 'Image': + if (entering) { + if (disableTags == 0) { + out('<img src="' + esc(node.destination, true) + + '" alt="'); + } + disableTags += 1; + } else { + disableTags -= 1; + if (disableTags == 0) { + if (node.title) { + out('" title="' + esc(node.title, true)); + } + out('" />'); + } + } + break; + + case 'Code': + out(tag('code') + esc(node.c) + tag('/code')); + break; + + case 'Document': + break; + + case 'Paragraph': + grandparent = node.parent.parent; + if (grandparent !== null && + grandparent.t === 'List') { + if (grandparent.tight) + break; + } + if (entering) { + cr(); + out(tag('p')); + } else { + out(tag('/p')); + cr(); + } + break; + + case 'BlockQuote': + if (entering) { + cr(); + out(tag('blockquote')); + cr(); + } else { + cr(); + out(tag('/blockquote')); + cr(); + } + break; + + case 'ListItem': + if (entering) { + out(tag('li')); + } else { + out(tag('/li')); + cr(); + } + break; + + case 'List': + tagname = node.list_data.type === 'Bullet' ? 'ul' : 'ol'; + if (entering) { + attr = (!node.list_data.start || node.list_data.start === 1) ? + [] : [['start', node.list_data.start.toString()]]; + cr(); + out(tag(tagname, attr)); + cr(); + } else { + cr(); + out(tag('/' + tagname)); + cr(); + } + break; + + case 'Header': + tagname = 'h' + node.level; + if (entering) { + cr(); + out(tag(tagname)); + } else { + out(tag('/' + tagname)); + cr(); + } + break; + + case 'CodeBlock': + info_words = node.info ? node.info.split(/ +/) : []; + attr = (info_words.length === 0 || info_words[0].length === 0) + ? [] : [['class', 'language-' + esc(info_words[0], true)]]; + cr(); + out(tag('pre') + tag('code', attr)); + out(this.escape(node.string_content)); + out(tag('/code') + tag('/pre')); + cr(); + break; + + case 'HtmlBlock': + cr(); + out(node.string_content); + cr(); + break; + + case 'HorizontalRule': + cr(); + out(tag('hr', [], true)); + cr(); + break; + + + case 'ReferenceDef': + break; + + default: + console.log("Unknown node type " + node.t); } + } - return result.join(this.blocksep); + return buffer.join(''); }; + // The HtmlRenderer object. function HtmlRenderer(){ return { @@ -157,11 +232,7 @@ function HtmlRenderer(){ .replace(/["]/g, '"'); } }, - renderInline: renderInline, - renderInlines: renderInlines, - renderBlock: renderBlock, - renderBlocks: renderBlocks, - render: renderBlock + render: renderNodes }; } diff --git a/js/lib/index.js b/js/lib/index.js index 4e04532..b2388ce 100755 --- a/js/lib/index.js +++ b/js/lib/index.js @@ -17,6 +17,7 @@ var renderAST = function(tree) { return util.inspect(tree, {depth: null}); }; +module.exports.Node = require('./node'); module.exports.DocParser = require('./blocks'); module.exports.HtmlRenderer = require('./html-renderer'); module.exports.ASTRenderer = renderAST; diff --git a/js/lib/inlines.js b/js/lib/inlines.js index ef7a00f..23b2b1e 100644 --- a/js/lib/inlines.js +++ b/js/lib/inlines.js @@ -85,6 +85,12 @@ var normalizeReference = function(s) { .toUpperCase(); }; +var text = function(s) { + var node = new Node('Text'); + node.c = s; + return node; +} + // INLINE PARSER // These are methods of an InlineParser object, defined below. @@ -123,9 +129,9 @@ var spnl = function() { // in the subject. If they succeed in matching anything, they // return the inline matched, advancing the subject. -// Attempt to parse backticks, returning either a backtick code span or a +// Attempt to parse backticks, adding either a backtick code span or a // literal sequence of backticks. -var parseBackticks = function(inlines) { +var parseBackticks = function(block) { var ticks = this.match(/^`+/); if (!ticks) { return 0; @@ -133,37 +139,42 @@ var parseBackticks = function(inlines) { var afterOpenTicks = this.pos; var foundCode = false; var matched; + var node; while (!foundCode && (matched = this.match(/`+/m))) { if (matched === ticks) { - inlines.push({ t: 'Code', c: this.subject.slice(afterOpenTicks, - this.pos - ticks.length) - .replace(/[ \n]+/g, ' ') - .trim() }); + node = new Node('Code'); + node.c = this.subject.slice(afterOpenTicks, + this.pos - ticks.length) + .replace(/[ \n]+/g, ' ') + .trim(); + block.appendChild(node); return true; } } // If we got here, we didn't match a closing backtick sequence. this.pos = afterOpenTicks; - inlines.push({ t: 'Text', c: ticks }); + block.appendChild(text(ticks)); return true; }; // Parse a backslash-escaped special character, adding either the escaped // character, a hard line break (if the backslash is followed by a newline), -// or a literal backslash to the 'inlines' list. -var parseBackslash = function(inlines) { +// or a literal backslash to the block's children. +var parseBackslash = function(block) { var subj = this.subject, pos = this.pos; + var node; if (subj.charCodeAt(pos) === C_BACKSLASH) { if (subj.charAt(pos + 1) === '\n') { this.pos = this.pos + 2; - inlines.push({ t: 'Hardbreak' }); + node = new Node('Hardbreak'); + block.appendChild(node); } else if (reEscapable.test(subj.charAt(pos + 1))) { this.pos = this.pos + 2; - inlines.push({ t: 'Text', c: subj.charAt(pos + 1) }); + block.appendChild(text(subj.charAt(pos + 1))); } else { this.pos++; - inlines.push({t: 'Text', c: '\\'}); + block.appendChild(text('\\')); } return true; } else { @@ -172,22 +183,23 @@ var parseBackslash = function(inlines) { }; // Attempt to parse an autolink (URL or email in pointy brackets). -var parseAutolink = function(inlines) { +var parseAutolink = function(block) { var m; var dest; + var node; if ((m = this.match(/^<([a-zA-Z0-9.!#$%&'*+\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)>/))) { // email autolink dest = m.slice(1, -1); - inlines.push( - {t: 'Link', - children: [{ t: 'Text', c: dest }], - destination: 'mailto:' + encodeURI(unescape(dest)) }); + node = new Node('Link'); + node.destination = 'mailto:' + encodeURI(unescape(dest)); + node.appendChild(text(dest)); + block.appendChild(node); return true; } else if ((m = this.match(/^<(?:coap|doi|javascript|aaa|aaas|about|acap|cap|cid|crid|data|dav|dict|dns|file|ftp|geo|go|gopher|h323|http|https|iax|icap|im|imap|info|ipp|iris|iris.beep|iris.xpc|iris.xpcs|iris.lwz|ldap|mailto|mid|msrp|msrps|mtqp|mupdate|news|nfs|ni|nih|nntp|opaquelocktoken|pop|pres|rtsp|service|session|shttp|sieve|sip|sips|sms|snmp|soap.beep|soap.beeps|tag|tel|telnet|tftp|thismessage|tn3270|tip|tv|urn|vemmi|ws|wss|xcon|xcon-userid|xmlrpc.beep|xmlrpc.beeps|xmpp|z39.50r|z39.50s|adiumxtra|afp|afs|aim|apt|attachment|aw|beshare|bitcoin|bolo|callto|chrome|chrome-extension|com-eventbrite-attendee|content|cvs|dlna-playsingle|dlna-playcontainer|dtn|dvb|ed2k|facetime|feed|finger|fish|gg|git|gizmoproject|gtalk|hcp|icon|ipn|irc|irc6|ircs|itms|jar|jms|keyparc|lastfm|ldaps|magnet|maps|market|message|mms|ms-help|msnim|mumble|mvn|notes|oid|palm|paparazzi|platform|proxy|psyc|query|res|resource|rmi|rsync|rtmp|secondlife|sftp|sgn|skype|smb|soldat|spotify|ssh|steam|svn|teamspeak|things|udp|unreal|ut2004|ventrilo|view-source|webcal|wtai|wyciwyg|xfire|xri|ymsgr):[^<>\x00-\x20]*>/i))) { dest = m.slice(1, -1); - inlines.push({ - t: 'Link', - children: [{ t: 'Text', c: dest }], - destination: encodeURI(unescape(dest)) }); + node = new Node('Link'); + node.destination = encodeURI(unescape(dest)); + node.appendChild(text(dest)); + block.appendChild(node); return true; } else { return false; @@ -195,10 +207,13 @@ var parseAutolink = function(inlines) { }; // Attempt to parse a raw HTML tag. -var parseHtmlTag = function(inlines) { +var parseHtmlTag = function(block) { var m = this.match(reHtmlTag); + var node; if (m) { - inlines.push({ t: 'Html', c: m }); + node = new Node('Html'); + node.c = m; + block.appendChild(node); return true; } else { return false; @@ -247,20 +262,8 @@ var scanDelims = function(cc) { can_close: can_close }; }; -var Emph = function(ils) { - return {t: 'Emph', children: ils}; -}; - -var Strong = function(ils) { - return {t: 'Strong', children: ils}; -}; - -var Str = function(s) { - return {t: 'Text', c: s}; -}; - // Attempt to parse emphasis or strong emphasis. -var parseEmphasis = function(cc, inlines) { +var parseEmphasis = function(cc, block) { var res = this.scanDelims(cc); var numdelims = res.numdelims; @@ -271,12 +274,13 @@ var parseEmphasis = function(cc, inlines) { } this.pos += numdelims; - inlines.push(Str(this.subject.slice(startpos, this.pos))); + var node = text(this.subject.slice(startpos, this.pos)); + block.appendChild(node); // Add entry to stack for this opener this.delimiters = { cc: cc, numdelims: numdelims, - pos: inlines.length - 1, + node: node, previous: this.delimiters, next: null, can_open: res.can_open, @@ -302,28 +306,14 @@ var removeDelimiter = function(delim) { } }; -var removeGaps = function(inlines) { - // remove gaps from inlines - var i, j; - j = 0; - for (i = 0 ; i < inlines.length; i++) { - if (inlines[i] !== null) { - inlines[j] = inlines[i]; - j++; - } - } - inlines.splice(j); -}; - -var processEmphasis = function(inlines, stack_bottom) { +var processEmphasis = function(block, stack_bottom) { "use strict"; var opener, closer; var opener_inl, closer_inl; var nextstack, tempstack; var use_delims; var contents; - var emph; - var i; + var tmp, next; // find first closer above stack_bottom: closer = this.delimiters; @@ -350,8 +340,8 @@ var processEmphasis = function(inlines, stack_bottom) { use_delims = closer.numdelims % 2 === 0 ? 2 : 1; } - opener_inl = inlines[opener.pos]; - closer_inl = inlines[closer.pos]; + opener_inl = opener.node; + closer_inl = closer.node; // remove used delimiters from stack elts and inlines opener.numdelims -= use_delims; @@ -360,17 +350,18 @@ var processEmphasis = function(inlines, stack_bottom) { closer_inl.c = closer_inl.c.slice(0, closer_inl.c.length - use_delims); // build contents for new emph element - contents = inlines.slice(opener.pos + 1, closer.pos); - removeGaps(contents); - - emph = use_delims === 1 ? Emph(contents) : Strong(contents); - - // insert into list of inlines - inlines[opener.pos + 1] = emph; - for (i = opener.pos + 2; i < closer.pos; i++) { - inlines[i] = null; + var emph = new Node(use_delims === 1 ? 'Emph' : 'Strong'); + + tmp = opener_inl.next; + while (tmp && tmp !== closer_inl) { + next = tmp.next; + tmp.unlink(); + emph.appendChild(tmp); + tmp = next; } + opener_inl.insertAfter(emph); + // remove elts btw opener and closer in delimiters stack tempstack = closer.previous; while (tempstack !== null && tempstack !== opener) { @@ -381,18 +372,17 @@ var processEmphasis = function(inlines, stack_bottom) { // if opener has 0 delims, remove it and the inline if (opener.numdelims === 0) { - inlines[opener.pos] = null; + opener_inl.unlink(); this.removeDelimiter(opener); } if (closer.numdelims === 0) { - inlines[closer.pos] = null; + closer_inl.unlink(); tempstack = closer.next; this.removeDelimiter(closer); closer = tempstack; } - } else { closer = closer.next; } @@ -403,8 +393,6 @@ var processEmphasis = function(inlines, stack_bottom) { } - removeGaps(inlines); - // remove all delimiters while (this.delimiters !== stack_bottom) { this.removeDelimiter(this.delimiters); @@ -445,18 +433,19 @@ var parseLinkLabel = function() { return m === null ? 0 : m.length; }; -// Add open bracket to delimiter stack and add a Str to inlines. -var parseOpenBracket = function(inlines) { +// Add open bracket to delimiter stack and add a text node to block's children. +var parseOpenBracket = function(block) { var startpos = this.pos; this.pos += 1; - inlines.push(Str("[")); + var node = text('['); + block.appendChild(node); // Add entry to stack for this opener this.delimiters = { cc: C_OPEN_BRACKET, numdelims: 1, - pos: inlines.length - 1, + node: node, previous: this.delimiters, next: null, can_open: true, @@ -472,19 +461,21 @@ var parseOpenBracket = function(inlines) { }; // IF next character is [, and ! delimiter to delimiter stack and -// add a Str to inlines. Otherwise just add a Str. -var parseBang = function(inlines) { +// add a text node to block's children. Otherwise just add a text node. +var parseBang = function(block) { var startpos = this.pos; this.pos += 1; if (this.peek() === C_OPEN_BRACKET) { this.pos += 1; - inlines.push(Str("![")); + + var node = text('!['); + block.appendChild(node); // Add entry to stack for this opener this.delimiters = { cc: C_BANG, numdelims: 1, - pos: inlines.length - 1, + node: node, previous: this.delimiters, next: null, can_open: true, @@ -495,22 +486,21 @@ var parseBang = function(inlines) { this.delimiters.previous.next = this.delimiters; } } else { - inlines.push(Str("!")); + block.appendChild(text('!')); } return true; }; // Try to match close bracket against an opening in the delimiter // stack. Add either a link or image, or a plain [ character, -// to the inlines stack. If there is a matching delimiter, +// to block's children. If there is a matching delimiter, // remove it from the delimiter stack. -var parseCloseBracket = function(inlines) { +var parseCloseBracket = function(block) { var startpos; var is_image; var dest; var title; var matched = false; - var link_text; var i; var reflabel; var opener; @@ -530,13 +520,13 @@ var parseCloseBracket = function(inlines) { if (opener === null) { // no matched opener, just return a literal - inlines.push(Str("]")); + block.appendChild(text(']')); return true; } if (!opener.active) { // no matched opener, just return a literal - inlines.push(Str("]")); + block.appendChild(text(']')); // take opener off emphasis stack this.removeDelimiter(opener); return true; @@ -544,15 +534,6 @@ var parseCloseBracket = function(inlines) { // If we got here, open is a potential opener is_image = opener.cc === C_BANG; - // instead of copying a slice, we null out the - // parts of inlines that don't correspond to link_text; - // later, we'll collapse them. This is awkward, and could - // be simplified if we made inlines a linked list rather than - // an array: - link_text = inlines.slice(0); - for (i = 0; i < opener.pos + 1; i++) { - link_text[i] = null; - } // Check to see if we have a link/image @@ -597,13 +578,22 @@ var parseCloseBracket = function(inlines) { } if (matched) { - this.processEmphasis(link_text, opener.previous); - - // remove the part of inlines that became link_text. - // see note above on why we need to do this instead of splice: - for (i = opener.pos; i < inlines.length; i++) { - inlines[i] = null; + var node = new Node(is_image ? 'Image' : 'Link'); + node.destination = dest; + node.title = title; + + var tmp, next; + tmp = opener.node.next; + while (tmp) { + next = tmp.next; + tmp.unlink(); + node.appendChild(tmp); + tmp = next; } + block.appendChild(node); + this.processEmphasis(node, opener.previous); + + opener.node.unlink(); // processEmphasis will remove this and later delimiters. // Now, for a link, we also deactivate earlier link openers. @@ -618,27 +608,23 @@ var parseCloseBracket = function(inlines) { } } - inlines.push({t: is_image ? 'Image' : 'Link', - destination: dest, - title: title, - children: link_text}); return true; } else { // no match this.removeDelimiter(opener); // remove this opener from stack this.pos = startpos; - inlines.push(Str("]")); + block.appendChild(text(']')); return true; } }; // Attempt to parse an entity, return Entity object if successful. -var parseEntity = function(inlines) { +var parseEntity = function(block) { var m; if ((m = this.match(reEntityHere))) { - inlines.push({ t: 'Text', c: entityToChar(m) }); + block.appendChild(text(entityToChar(m))); return true; } else { return false; @@ -646,11 +632,11 @@ var parseEntity = function(inlines) { }; // Parse a run of ordinary characters, or a single character with -// a special meaning in markdown, as a plain string, adding to inlines. -var parseString = function(inlines) { +// a special meaning in markdown, as a plain string. +var parseString = function(block) { var m; if ((m = this.match(reMain))) { - inlines.push({ t: 'Text', c: m }); + block.appendChild(text(m)); return true; } else { return false; @@ -659,17 +645,15 @@ var parseString = function(inlines) { // Parse a newline. If it was preceded by two spaces, return a hard // line break; otherwise a soft line break. -var parseNewline = function(inlines) { +var parseNewline = function(block) { var m = this.match(/^ *\n/); if (m) { - if (m.length > 2) { - inlines.push({ t: 'Hardbreak' }); - } else if (m.length > 0) { - inlines.push({ t: 'Softbreak' }); - } + var node = new Node(m.length > 2 ? 'Hardbreak' : 'Softbreak'); + block.appendChild(node); return true; + } else { + return false; } - return false; }; // Attempt to parse a link reference, modifying refmap. @@ -731,9 +715,9 @@ var parseReference = function(s, refmap) { }; // Parse the next inline element in subject, advancing subject position. -// On success, add the result to the inlines list, and return true. +// On success, add the result to block's children and return true. // On failure, return false. -var parseInline = function(inlines) { +var parseInline = function(block) { "use strict"; var c = this.peek(); if (c === -1) { @@ -743,56 +727,57 @@ var parseInline = function(inlines) { switch(c) { case C_NEWLINE: case C_SPACE: - res = this.parseNewline(inlines); + res = this.parseNewline(block); break; case C_BACKSLASH: - res = this.parseBackslash(inlines); + res = this.parseBackslash(block); break; case C_BACKTICK: - res = this.parseBackticks(inlines); + res = this.parseBackticks(block); break; case C_ASTERISK: case C_UNDERSCORE: - res = this.parseEmphasis(c, inlines); + res = this.parseEmphasis(c, block); break; case C_OPEN_BRACKET: - res = this.parseOpenBracket(inlines); + res = this.parseOpenBracket(block); break; case C_BANG: - res = this.parseBang(inlines); + res = this.parseBang(block); break; case C_CLOSE_BRACKET: - res = this.parseCloseBracket(inlines); + res = this.parseCloseBracket(block); break; case C_LESSTHAN: - res = this.parseAutolink(inlines) || this.parseHtmlTag(inlines); + res = this.parseAutolink(block) || this.parseHtmlTag(block); break; case C_AMPERSAND: - res = this.parseEntity(inlines); + res = this.parseEntity(block); break; default: - res = this.parseString(inlines); + res = this.parseString(block); break; } if (!res) { this.pos += 1; - inlines.push({t: 'Text', c: fromCodePoint(c)}); + var textnode = new Node('Text'); + textnode.c = fromCodePoint(c); + block.appendChild(textnode); } return true; }; -// Parse s as a list of inlines, using refmap to resolve references. -var parseInlines = function(s, refmap) { - this.subject = s; +// Parse string_content in block into inline children, +// using refmap to resolve references. +var parseInlines = function(block, refmap) { + this.subject = block.string_content.trim(); this.pos = 0; this.refmap = refmap || {}; this.delimiters = null; - var inlines = []; - while (this.parseInline(inlines)) { + while (this.parseInline(block)) { } - this.processEmphasis(inlines, null); - return inlines; + this.processEmphasis(block, null); }; // The InlineParser object. diff --git a/js/lib/node.js b/js/lib/node.js new file mode 100644 index 0000000..f96d65c --- /dev/null +++ b/js/lib/node.js @@ -0,0 +1,195 @@ +util = require('util'); + +function isContainer(node) { + var t = node.t; + return (t == 'Document' || + t == 'BlockQuote' || + t == 'List' || + t == 'ListItem' || + t == 'Paragraph' || + t == 'Header' || + t == 'Emph' || + t == 'Strong' || + t == 'Link' || + t == 'Image'); +} + +function NodeWalker(root) { + this.current = root; + this.root = root; + this.entering = true; +} + +NodeWalker.prototype.resumeAt = function(node, entering) { + this.current = node; + this.entering = (entering === true); +}; + +NodeWalker.prototype.next = function(){ + var cur = this.current; + var entering = this.entering; + + if (!cur) + return null; + + var container = isContainer(cur); + + if (entering && container) { + if (cur.firstChild) { + this.current = cur.firstChild; + this.entering = true; + } else { + // stay on node but exit + this.entering = false; + } + + } else if (!entering && cur == this.root) { + // don't move past root + this.current = null; + + } else if (cur.next) { + this.current = cur.next; + this.entering = true; + + } else { + this.current = cur.parent; + this.entering = false; + } + + return {entering: entering, node: cur}; +}; + +function Node(nodeType) { + this.t = nodeType; + this.parent = null; + this.firstChild = null; + this.lastChild = null; + this.prev = null; + this.next = null; + this.pos = {}; +} + +Node.prototype.isContainer = function() { + return isContainer(this); +}; + +Node.prototype.appendChild = function(child) { + child.unlink(); + child.parent = this; + if (this.lastChild) { + this.lastChild.next = child; + child.prev = this.lastChild; + this.lastChild = child; + } else { + this.firstChild = child; + this.lastChild = child; + } +}; + +Node.prototype.prependChild = function(child) { + child.unlink(); + child.parent = this; + if (this.firstChild) { + this.firstChild.prev = child; + child.next = this.firstChild; + this.firstChild = child; + } else { + this.firstChild = child; + this.lastChild = child; + } +}; + +Node.prototype.unlink = function() { + if (this.prev) { + this.prev.next = this.next; + } else if (this.parent) { + this.parent.firstChild = this.next; + } + if (this.next) { + this.next.prev = this.prev; + } else if (this.parent) { + this.parent.lastChild = this.prev; + } + this.parent = null; + this.next = null; + this.prev = null; +}; + +Node.prototype.insertAfter = function(sibling) { + sibling.unlink(); + sibling.next = this.next; + if (sibling.next) { + sibling.next.prev = sibling; + } + sibling.prev = this; + this.next = sibling; + sibling.parent = this.parent; + if (!sibling.next) { + sibling.parent.lastChild = sibling; + } +}; + +Node.prototype.insertBefore = function(sibling) { + sibling.unlink(); + sibling.prev = this.prev; + if (sibling.prev) { + sibling.prev.next = sibling; + } + sibling.next = this; + this.prev = sibling; + sibling.parent = this.parent; + if (!sibling.prev) { + sibling.parent.firstChild = sibling; + } +}; + +Node.prototype.walker = function() { + var walker = new NodeWalker(this); + return walker; +}; + +Node.prototype.toAST = function() { + var children; + var cur; + var result = { t: this.t }; + + var propsToShow = ['t', 'c', 'list_data', 'string_content', + 'pos', 'start_column', 'end_column', + 'tight', 'info']; + + for (var i = 0; i < propsToShow.length; i++) { + var prop = propsToShow[i]; + if (this[prop] !== undefined) { + result[prop] = this[prop]; + } + } + + if (isContainer(this)) { + children = []; + if (this.firstChild) { + cur = this.firstChild; + while (cur) { + // TODO avoid recursion here... + children.push(cur.toAST()); + cur = cur.next; + } + result.children = children; + } + } + return result; + +}; + +module.exports = Node; + + +/* Example of use of walker: + + var walker = w.walker(); + var event; + + while (event = walker.next()) { + console.log(event.entering, event.node.t); + } + +*/ @@ -56,7 +56,7 @@ fs.readFile('spec.txt', 'utf8', function(err, data) { current_section = example.section; cursor.reset().write(current_section).reset().write(' '); } - var actual = writer.renderBlock(reader.parse(example.markdown.replace(/→/g, '\t'))); + var actual = writer.render(reader.parse(example.markdown.replace(/→/g, '\t'))); if (actual === example.html) { passed++; cursor.green().write('✓').reset(); |