aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-12-27 21:51:30 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2014-12-28 16:27:00 -0800
commitd57f3952ca8b9aac16db8243539f4c1c5dbf3c93 (patch)
tree21ff4ae66cc5d6130963172df2badb3a77a4930e
parentbf44064d09afd04039058a00c32c1532fb5e2b61 (diff)
Added xml writer, to dump the AST in XML.
This is a work-in-progress. CommonMark.dtd gives the DTD for the generated XML. Closes #53.
-rw-r--r--CommonMark.dtd45
-rw-r--r--src/CMakeLists.txt1
-rw-r--r--src/cmark.h5
-rw-r--r--src/main.c8
-rw-r--r--src/xml.c140
5 files changed, 198 insertions, 1 deletions
diff --git a/CommonMark.dtd b/CommonMark.dtd
new file mode 100644
index 0000000..4ad924b
--- /dev/null
+++ b/CommonMark.dtd
@@ -0,0 +1,45 @@
+<!-- DTD for CommonMark xml export format -->
+<!ELEMENT DOCUMENT
+(BLOCK_QUOTE|LIST|CODE_BLOCK|HTML|PARAGRAPH|HEADER|HRULE)*>
+<!ELEMENT BLOCK_QUOTE
+(BLOCK_QUOTE|LIST|CODE_BLOCK|HTML|PARAGRAPH|HEADER|HRULE)*>
+<!ELEMENT LIST (LIST_ITEM)+>
+<!ATTLIST LIST
+ type (bullet|ordered) #REQUIRED
+ start CDATA #IMPLIED
+ tight (true|false) #REQUIRED
+ delimiter (period|paren) #IMPLIED>
+<!ELEMENT LIST_ITEM
+(BLOCK_QUOTE|LIST|CODE_BLOCK|HTML|PARAGRAPH|HEADER|HRULE)*>
+<!ELEMENT CODE_BLOCK (#PCDATA)>
+<!ATTLIST CODE_BLOCK
+ xml:space CDATA #FIXED "preserve"
+ info CDATA #IMPLIED>
+<!ELEMENT HTML (#PCDATA)>
+<!ATTLIST HTML xml:space CDATA #FIXED "preserve">
+<!ELEMENT PARAGRAPH
+(TEXT|SOFTBREAK|LINEBREAK|CODE|INLINE_HTML|EMPH|STRONG|LINK|IMAGE)*>
+<!ELEMENT HEADER
+(TEXT|SOFTBREAK|LINEBREAK|CODE|INLINE_HTML|EMPH|STRONG|LINK|IMAGE)*>
+<!ATTLIST HEADER level (1|2|3|4|5|6) #REQUIRED>
+<!ELEMENT HRULE EMPTY>
+<!ELEMENT TEXT (#PCDATA)>
+<!ELEMENT SOFTBREAK EMPTY>
+<!ELEMENT LINEBREAK EMPTY>
+<!ELEMENT CODE (#PCDATA)>
+<!ATTLIST CODE xml:space CDATA #FIXED "preserve">
+<!ELEMENT INLINE_HTML (#PCDATA)>
+<!ATTLIST INLINE_HTML xml:space CDATA #FIXED "preserve">
+<!ELEMENT EMPH
+(TEXT|SOFTBREAK|LINEBREAK|CODE|INLINE_HTML|EMPH|STRONG|LINK|IMAGE)*>
+<!ELEMENT STRONG
+(TEXT|SOFTBREAK|LINEBREAK|CODE|INLINE_HTML|EMPH|STRONG|LINK|IMAGE)*>
+<!ELEMENT LINK
+(TEXT|SOFTBREAK|LINEBREAK|CODE|INLINE_HTML|EMPH|STRONG|LINK|IMAGE)*>
+<!ATTLIST LINK url CDATA #REQUIRED>
+<!ATTLIST LINK title CDATA #IMPLIED>
+<!ELEMENT IMAGE
+(TEXT|SOFTBREAK|LINEBREAK|CODE|INLINE_HTML|EMPH|STRONG|LINK|IMAGE)*>
+<!ATTLIST IMAGE url CDATA #REQUIRED>
+<!ATTLIST IMAGE title CDATA #IMPLIED>
+<!ATTLIST ANY sourcepos CDATA #IMPLIED>
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 8eb198e..ef26bef 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -29,6 +29,7 @@ set(LIBRARY_SOURCES
buffer.c
references.c
man.c
+ xml.c
html.c
html_unescape.gperf
houdini_href_e.c
diff --git a/src/cmark.h b/src/cmark.h
index 3b60d67..1dab0dd 100644
--- a/src/cmark.h
+++ b/src/cmark.h
@@ -394,6 +394,11 @@ cmark_node *cmark_parse_file(FILE *f);
CMARK_EXPORT
char *cmark_render_ast(cmark_node *root);
+/** Render a 'node' tree as XML.
+ */
+CMARK_EXPORT
+char *cmark_render_xml(cmark_node *root);
+
/** Render a 'node' tree as an HTML fragment. It is up to the user
* to add an appropriate header and footer.
*/
diff --git a/src/main.c b/src/main.c
index be3d305..be1bfd9 100644
--- a/src/main.c
+++ b/src/main.c
@@ -10,6 +10,7 @@
typedef enum {
FORMAT_NONE,
FORMAT_HTML,
+ FORMAT_XML,
FORMAT_MAN,
FORMAT_AST
} writer_format;
@@ -18,7 +19,7 @@ void print_usage()
{
printf("Usage: cmark [FILE*]\n");
printf("Options:\n");
- printf(" --to, -t FORMAT Specify output format (html, man, ast)\n");
+ printf(" --to, -t FORMAT Specify output format (html, xml, man, ast)\n");
printf(" --help, -h Print usage information\n");
printf(" --version Print version\n");
}
@@ -33,6 +34,9 @@ static void print_document(cmark_node *document, writer_format writer)
case FORMAT_HTML:
result = cmark_render_html(document);
break;
+ case FORMAT_XML:
+ result = cmark_render_xml(document);
+ break;
case FORMAT_MAN:
result = cmark_render_man(document);
break;
@@ -74,6 +78,8 @@ int main(int argc, char *argv[])
writer = FORMAT_MAN;
} else if (strcmp(argv[i], "html") == 0) {
writer = FORMAT_HTML;
+ } else if (strcmp(argv[i], "xml") == 0) {
+ writer = FORMAT_XML;
} else if (strcmp(argv[i], "ast") == 0) {
writer = FORMAT_AST;
} else {
diff --git a/src/xml.c b/src/xml.c
new file mode 100644
index 0000000..86fb6d4
--- /dev/null
+++ b/src/xml.c
@@ -0,0 +1,140 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "config.h"
+#include "cmark.h"
+#include "node.h"
+#include "buffer.h"
+#include "houdini.h"
+
+// Functions to convert cmark_nodes to XML strings.
+
+static void escape_xml(cmark_strbuf *dest, const unsigned char *source, int length)
+{
+ if (source != NULL) {
+ if (length < 0)
+ length = strlen((char *)source);
+
+ houdini_escape_html0(dest, source, (size_t)length, 0);
+ }
+}
+
+struct render_state {
+ cmark_strbuf* xml;
+ int indent;
+};
+
+static inline void indent(struct render_state *state)
+{
+ int i;
+ for (i = 0; i < state->indent; i++) {
+ cmark_strbuf_putc(state->xml, ' ');
+ }
+}
+
+static int
+S_render_node(cmark_node *node, cmark_event_type ev_type, void *vstate)
+{
+ struct render_state *state = vstate;
+ cmark_strbuf *xml = state->xml;
+ bool literal = false;
+
+ bool entering = (ev_type == CMARK_EVENT_ENTER);
+
+ if (entering) {
+ indent(state);
+ cmark_strbuf_printf(xml, "<%s", cmark_node_type_string(node));
+
+ if (node->start_line != 0) {
+ cmark_strbuf_printf(xml, " sourcepos=\"%d:%d-%d\"",
+ node->start_line,
+ node->start_column,
+ node->end_line);
+ }
+
+ literal = false;
+
+ switch (node->type) {
+ case CMARK_NODE_TEXT:
+ case CMARK_NODE_CODE:
+ case CMARK_NODE_HTML:
+ case CMARK_NODE_INLINE_HTML:
+ cmark_strbuf_puts(xml, ">");
+ escape_xml(xml, node->as.literal.data,
+ node->as.literal.len);
+ cmark_strbuf_puts(xml, "</");
+ cmark_strbuf_puts(xml, cmark_node_type_string(node));
+ literal = true;
+ break;
+ case CMARK_NODE_CODE_BLOCK:
+ if (node->as.code.info.len > 0) {
+ cmark_strbuf_puts(xml, " info=\"");
+ escape_xml(xml, node->as.code.info.data,
+ node->as.code.info.len);
+ cmark_strbuf_putc(xml, '"');
+ }
+ cmark_strbuf_puts(xml, ">");
+ escape_xml(xml, node->as.code.literal.data,
+ node->as.code.literal.len);
+ cmark_strbuf_puts(xml, "</");
+ cmark_strbuf_puts(xml, cmark_node_type_string(node));
+ literal = true;
+ break;
+ case CMARK_NODE_LINK:
+ case CMARK_NODE_IMAGE:
+ cmark_strbuf_puts(xml, " url=\"");
+ escape_xml(xml, node->as.link.url, -1);
+ cmark_strbuf_putc(xml, '"');
+ cmark_strbuf_puts(xml, " title=\"");
+ escape_xml(xml, node->as.link.title, -1);
+ cmark_strbuf_putc(xml, '"');
+ break;
+ default:
+ break;
+ }
+ if (node->first_child) {
+ state->indent += 2;
+ } else if (!literal) {
+ cmark_strbuf_puts(xml, " /");
+ }
+
+ } else {
+ if (node->first_child) {
+ state->indent -= 2;
+ }
+ indent(state);
+ cmark_strbuf_printf(xml, "</%s", cmark_node_type_string(node));
+ }
+
+ // TODO print attributes
+
+ cmark_strbuf_puts(xml, ">\n");
+
+ return 1;
+}
+
+char *cmark_render_xml(cmark_node *root)
+{
+ char *result;
+ cmark_strbuf xml = GH_BUF_INIT;
+ cmark_event_type ev_type;
+ cmark_node *cur;
+ struct render_state state = { &xml, 0 };
+ cmark_iter *iter = cmark_iter_new(root);
+
+ cmark_strbuf_puts(state.xml,
+ "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
+ cmark_strbuf_puts(state.xml,
+ "<!DOCTYPE CommonMark SYSTEM \"CommonMark.dtd\">\n");
+ while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+ cur = cmark_iter_get_node(iter);
+ S_render_node(cur, ev_type, &state);
+ }
+ result = (char *)cmark_strbuf_detach(&xml);
+
+ cmark_iter_free(iter);
+ cmark_strbuf_free(&xml);
+ return result;
+}