From d57f3952ca8b9aac16db8243539f4c1c5dbf3c93 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 27 Dec 2014 21:51:30 -0800 Subject: Added xml writer, to dump the AST in XML. This is a work-in-progress. CommonMark.dtd gives the DTD for the generated XML. Closes #53. --- CommonMark.dtd | 45 +++++++++++++++++ src/CMakeLists.txt | 1 + src/cmark.h | 5 ++ src/main.c | 8 ++- src/xml.c | 140 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 CommonMark.dtd create mode 100644 src/xml.c diff --git a/CommonMark.dtd b/CommonMark.dtd new file mode 100644 index 0000000..4ad924b --- /dev/null +++ b/CommonMark.dtd @@ -0,0 +1,45 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8eb198e..ef26bef 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -29,6 +29,7 @@ set(LIBRARY_SOURCES buffer.c references.c man.c + xml.c html.c html_unescape.gperf houdini_href_e.c diff --git a/src/cmark.h b/src/cmark.h index 3b60d67..1dab0dd 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -394,6 +394,11 @@ cmark_node *cmark_parse_file(FILE *f); CMARK_EXPORT char *cmark_render_ast(cmark_node *root); +/** Render a 'node' tree as XML. + */ +CMARK_EXPORT +char *cmark_render_xml(cmark_node *root); + /** Render a 'node' tree as an HTML fragment. It is up to the user * to add an appropriate header and footer. */ diff --git a/src/main.c b/src/main.c index be3d305..be1bfd9 100644 --- a/src/main.c +++ b/src/main.c @@ -10,6 +10,7 @@ typedef enum { FORMAT_NONE, FORMAT_HTML, + FORMAT_XML, FORMAT_MAN, FORMAT_AST } writer_format; @@ -18,7 +19,7 @@ void print_usage() { printf("Usage: cmark [FILE*]\n"); printf("Options:\n"); - printf(" --to, -t FORMAT Specify output format (html, man, ast)\n"); + printf(" --to, -t FORMAT Specify output format (html, xml, man, ast)\n"); printf(" --help, -h Print usage information\n"); printf(" --version Print version\n"); } @@ -33,6 +34,9 @@ static void print_document(cmark_node *document, writer_format writer) case FORMAT_HTML: result = cmark_render_html(document); break; + case FORMAT_XML: + result = cmark_render_xml(document); + break; case FORMAT_MAN: result = cmark_render_man(document); break; @@ -74,6 +78,8 @@ int main(int argc, char *argv[]) writer = FORMAT_MAN; } else if (strcmp(argv[i], "html") == 0) { writer = FORMAT_HTML; + } else if (strcmp(argv[i], "xml") == 0) { + writer = FORMAT_XML; } else if (strcmp(argv[i], "ast") == 0) { writer = FORMAT_AST; } else { diff --git a/src/xml.c b/src/xml.c new file mode 100644 index 0000000..86fb6d4 --- /dev/null +++ b/src/xml.c @@ -0,0 +1,140 @@ +#include +#include +#include +#include + +#include "config.h" +#include "cmark.h" +#include "node.h" +#include "buffer.h" +#include "houdini.h" + +// Functions to convert cmark_nodes to XML strings. + +static void escape_xml(cmark_strbuf *dest, const unsigned char *source, int length) +{ + if (source != NULL) { + if (length < 0) + length = strlen((char *)source); + + houdini_escape_html0(dest, source, (size_t)length, 0); + } +} + +struct render_state { + cmark_strbuf* xml; + int indent; +}; + +static inline void indent(struct render_state *state) +{ + int i; + for (i = 0; i < state->indent; i++) { + cmark_strbuf_putc(state->xml, ' '); + } +} + +static int +S_render_node(cmark_node *node, cmark_event_type ev_type, void *vstate) +{ + struct render_state *state = vstate; + cmark_strbuf *xml = state->xml; + bool literal = false; + + bool entering = (ev_type == CMARK_EVENT_ENTER); + + if (entering) { + indent(state); + cmark_strbuf_printf(xml, "<%s", cmark_node_type_string(node)); + + if (node->start_line != 0) { + cmark_strbuf_printf(xml, " sourcepos=\"%d:%d-%d\"", + node->start_line, + node->start_column, + node->end_line); + } + + literal = false; + + switch (node->type) { + case CMARK_NODE_TEXT: + case CMARK_NODE_CODE: + case CMARK_NODE_HTML: + case CMARK_NODE_INLINE_HTML: + cmark_strbuf_puts(xml, ">"); + escape_xml(xml, node->as.literal.data, + node->as.literal.len); + cmark_strbuf_puts(xml, "as.code.info.len > 0) { + cmark_strbuf_puts(xml, " info=\""); + escape_xml(xml, node->as.code.info.data, + node->as.code.info.len); + cmark_strbuf_putc(xml, '"'); + } + cmark_strbuf_puts(xml, ">"); + escape_xml(xml, node->as.code.literal.data, + node->as.code.literal.len); + cmark_strbuf_puts(xml, "as.link.url, -1); + cmark_strbuf_putc(xml, '"'); + cmark_strbuf_puts(xml, " title=\""); + escape_xml(xml, node->as.link.title, -1); + cmark_strbuf_putc(xml, '"'); + break; + default: + break; + } + if (node->first_child) { + state->indent += 2; + } else if (!literal) { + cmark_strbuf_puts(xml, " /"); + } + + } else { + if (node->first_child) { + state->indent -= 2; + } + indent(state); + cmark_strbuf_printf(xml, "\n"); + + return 1; +} + +char *cmark_render_xml(cmark_node *root) +{ + char *result; + cmark_strbuf xml = GH_BUF_INIT; + cmark_event_type ev_type; + cmark_node *cur; + struct render_state state = { &xml, 0 }; + cmark_iter *iter = cmark_iter_new(root); + + cmark_strbuf_puts(state.xml, + "\n"); + cmark_strbuf_puts(state.xml, + "\n"); + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + cur = cmark_iter_get_node(iter); + S_render_node(cur, ev_type, &state); + } + result = (char *)cmark_strbuf_detach(&xml); + + cmark_iter_free(iter); + cmark_strbuf_free(&xml); + return result; +} -- cgit v1.2.3