diff options
-rw-r--r-- | man/man1/cmark.1 | 7 | ||||
-rw-r--r-- | man/man3/cmark.3 | 4 | ||||
-rw-r--r-- | src/CMakeLists.txt | 1 | ||||
-rw-r--r-- | src/cmark.h | 5 | ||||
-rw-r--r-- | src/main.c | 61 | ||||
-rw-r--r-- | src/man.c | 245 |
6 files changed, 304 insertions, 19 deletions
diff --git a/man/man1/cmark.1 b/man/man1/cmark.1 index 4118520..2d5af77 100644 --- a/man/man1/cmark.1 +++ b/man/man1/cmark.1 @@ -13,14 +13,15 @@ acts as a pipe, reading from \fRstdin\fR or from the specified files and writing to \fRstdout\fR. -It converts Markdown formatted plain text to HTML, using the conventions +It converts Markdown formatted plain text to HTML (or groff man or +an abstract representation of the AST), using the conventions described in the CommonMark spec. If multiple files are specified, the contents of the files are simply concatenated before parsing. .SH "OPTIONS" .TP 12n -\-\--ast -Print an abstract syntax tree instead of HTML. +\-\-to, \-t \f[I]FORMAT\f[] +Specify output format (\f[C]html\f[], \f[C]man\f[], \f[C]ast\f[]). .TP 12n \-\-help Print usage information. diff --git a/man/man3/cmark.3 b/man/man3/cmark.3 index 62d89e2..5f6d296 100644 --- a/man/man3/cmark.3 +++ b/man/man3/cmark.3 @@ -292,6 +292,10 @@ typedef enum { .PP +\fIchar *\fR \fBcmark_render_man\fR(\fIcmark_node *root\fR) + +.PP + .SH AUTHORS John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer. diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e6a578c..75a0152 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -28,6 +28,7 @@ set(LIBRARY_SOURCES utf8.c buffer.c references.c + man.c html.c html_unescape.gperf houdini_href_e.c diff --git a/src/cmark.h b/src/cmark.h index 42ffa81..d276ba9 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -346,6 +346,11 @@ char *cmark_render_ast(cmark_node *root); CMARK_EXPORT char *cmark_render_html(cmark_node *root); +/** + */ +CMARK_EXPORT +char *cmark_render_man(cmark_node *root); + /** .SH AUTHORS * * John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer. @@ -7,22 +7,38 @@ #include "debug.h" #include "bench.h" +typedef enum { + FORMAT_NONE, + FORMAT_HTML, + FORMAT_MAN, + FORMAT_AST +} writer_format; + void print_usage() { printf("Usage: cmark [FILE*]\n"); - printf("Options: --help, -h Print usage information\n"); - printf(" --ast Print AST instead of HTML\n"); - printf(" --version Print version\n"); + printf("Options:\n"); + printf(" --to, -t FORMAT Specify output format (html, man, ast)\n"); + printf(" --help, -h Print usage information\n"); + printf(" --version Print version\n"); } -static void print_document(cmark_node *document, bool ast) +static void print_document(cmark_node *document, writer_format writer) { char *result; - if (ast) { + switch (writer) { + case FORMAT_AST: result = cmark_render_ast(document); - } else { - + break; + case FORMAT_HTML: result = cmark_render_html(document); + break; + case FORMAT_MAN: + result = cmark_render_man(document); + break; + default: + fprintf(stderr, "Unknown format %d\n", writer); + exit(1); } printf("%s", result); free(result); @@ -31,12 +47,12 @@ static void print_document(cmark_node *document, bool ast) int main(int argc, char *argv[]) { int i, numfps = 0; - bool ast = false; int *files; char buffer[4096]; cmark_parser *parser; size_t bytes; cmark_node *document; + writer_format writer = FORMAT_HTML; parser = cmark_parser_new(); files = (int *)malloc(argc * sizeof(*files)); @@ -50,8 +66,26 @@ int main(int argc, char *argv[]) (strcmp(argv[i], "-h") == 0)) { print_usage(); exit(0); - } else if (strcmp(argv[i], "--ast") == 0) { - ast = true; + } else if ((strcmp(argv[i], "-t") == 0) || + (strcmp(argv[i], "--to") == 0)) { + i += 1; + if (i < argc) { + if (strcmp(argv[i], "man") == 0) { + writer = FORMAT_MAN; + } else if (strcmp(argv[i], "html") == 0) { + writer = FORMAT_HTML; + } else if (strcmp(argv[i], "ast") == 0) { + writer = FORMAT_AST; + } else { + fprintf(stderr, + "Unknown format %s\n", argv[i]); + exit(1); + } + } else { + fprintf(stderr, "No argument provided for %s\n", + argv[i - 1]); + exit(1); + } } else if (*argv[i] == '-') { print_usage(); exit(1); @@ -81,11 +115,6 @@ int main(int argc, char *argv[]) } if (numfps == 0) { - /* - document = cmark_parse_file(stdin); - print_document(document, ast); - exit(0); - */ while ((bytes = fread(buffer, 1, sizeof(buffer), stdin)) > 0) { cmark_parser_feed(parser, buffer, bytes); @@ -101,7 +130,7 @@ int main(int argc, char *argv[]) cmark_parser_free(parser); start_timer(); - print_document(document, ast); + print_document(document, writer); end_timer("print_document"); start_timer(); diff --git a/src/man.c b/src/man.c new file mode 100644 index 0000000..b86c7e6 --- /dev/null +++ b/src/man.c @@ -0,0 +1,245 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> + +#include "config.h" +#include "cmark.h" +#include "node.h" +#include "buffer.h" + +// Functions to convert cmark_nodes to groff man strings. + +static void escape_man(strbuf *dest, const unsigned char *source, int length) +{ + int i; + unsigned char c; + + for (i = 0; i < length; i++) { + c = source[i]; + if (c == '.' && i == 0) { + strbuf_puts(dest, "\\&."); + } else if (c == '\'' && i == 0) { + strbuf_puts(dest, "\\&'"); + } else if (c == '-') { + strbuf_puts(dest, "\\-"); + } else if (c == '\\') { + strbuf_puts(dest, "\\e"); + } else { + strbuf_putc(dest, source[i]); + } + } +} + +static inline void cr(strbuf *man) +{ + if (man->size && man->ptr[man->size - 1] != '\n') + strbuf_putc(man, '\n'); +} + +struct render_state { + strbuf* man; + cmark_node *plain; +}; + +static int +S_render_node(cmark_node *node, cmark_event_type ev_type, void *vstate) +{ + struct render_state *state = vstate; + cmark_node *tmp; + strbuf *man = state->man; + int list_number; + bool entering = (ev_type == CMARK_EVENT_ENTER); + + if (state->plain == node) { // back at original node + state->plain = NULL; + } + + if (state->plain != NULL) { + switch(node->type) { + case CMARK_NODE_TEXT: + case CMARK_NODE_INLINE_CODE: + case CMARK_NODE_INLINE_HTML: + escape_man(man, node->as.literal.data, + node->as.literal.len); + break; + + case CMARK_NODE_LINEBREAK: + case CMARK_NODE_SOFTBREAK: + strbuf_putc(man, ' '); + break; + + default: + break; + } + return 1; + } + + switch (node->type) { + case CMARK_NODE_BLOCK_QUOTE: + if (entering) { + cr(man); + strbuf_puts(man, ".RS"); + cr(man); + } else { + cr(man); + strbuf_puts(man, ".RE"); + cr(man); + } + break; + + case CMARK_NODE_LIST: + break; + + case CMARK_NODE_LIST_ITEM: + if (entering) { + cr(man); + strbuf_puts(man, ".IP "); + if (cmark_node_get_list_type(node->parent) == + CMARK_BULLET_LIST) { + strbuf_puts(man, "\\[bu] 2"); + } else { + list_number = cmark_node_get_list_start(node->parent); + tmp = node; + while (tmp->prev) { + tmp = tmp->prev; + list_number += 1; + } + strbuf_printf(man, "\"%d.\" 4", list_number); + } + cr(man); + } else { + cr(man); + } + break; + + case CMARK_NODE_HEADER: + if (entering) { + cr(man); + strbuf_puts(man, + cmark_node_get_header_level(node) == 1 ? + ".SH" : ".SS"); + cr(man); + } else { + cr(man); + } + break; + + case CMARK_NODE_CODE_BLOCK: + cr(man); + strbuf_puts(man, ".IP\n.nf\n\\f[C]\n"); + escape_man(man, node->string_content.ptr, + node->string_content.size); + cr(man); + strbuf_puts(man, "\\f[]\n.fi"); + cr(man); + break; + + case CMARK_NODE_HTML: + break; + + case CMARK_NODE_HRULE: + cr(man); + strbuf_puts(man, ".PP\n * * * * *"); + cr(man); + break; + + case CMARK_NODE_PARAGRAPH: + if (entering) { + // no blank line if first paragraph in list: + if (node->parent && + node->parent->type == CMARK_NODE_LIST_ITEM && + node->prev == NULL) { + // no blank line or .PP + } else { + cr(man); + strbuf_puts(man, ".PP\n"); + } + } else { + cr(man); + } + break; + + case CMARK_NODE_TEXT: + escape_man(man, node->as.literal.data, + node->as.literal.len); + break; + + case CMARK_NODE_LINEBREAK: + strbuf_puts(man, ".PD 0\n.P\n.PD"); + cr(man); + break; + + case CMARK_NODE_SOFTBREAK: + strbuf_putc(man, '\n'); + break; + + case CMARK_NODE_INLINE_CODE: + strbuf_puts(man, "\\f[C]"); + escape_man(man, node->as.literal.data, node->as.literal.len); + strbuf_puts(man, "\\f[]"); + break; + + case CMARK_NODE_INLINE_HTML: + break; + + case CMARK_NODE_STRONG: + if (entering) { + strbuf_puts(man, "\\f[B]"); + } else { + strbuf_puts(man, "\\f[]"); + } + break; + + case CMARK_NODE_EMPH: + if (entering) { + strbuf_puts(man, "\\f[I]"); + } else { + strbuf_puts(man, "\\f[]"); + } + break; + + case CMARK_NODE_LINK: + if (!entering) { + strbuf_printf(man, " (%s)", + cmark_node_get_url(node)); + } + break; + + case CMARK_NODE_IMAGE: + if (entering) { + strbuf_puts(man, "[IMAGE: "); + state->plain = node; + } else { + strbuf_puts(man, "]"); + } + break; + + default: + assert(false); + break; + } + + // strbuf_putc(man, 'x'); + return 1; +} + +char *cmark_render_man(cmark_node *root) +{ + char *result; + strbuf man = GH_BUF_INIT; + struct render_state state = { &man, NULL }; + cmark_node *cur; + cmark_event_type ev_type; + cmark_iter *iter = cmark_iter_new(root); + + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + cur = cmark_iter_get_node(iter); + S_render_node(cur, ev_type, &state); + } + result = (char *)strbuf_detach(&man); + + cmark_iter_free(iter); + strbuf_free(&man); + return result; +} |