From 59cc3c9323dc0b7aa1fd5817e12884ef925461d4 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Fri, 12 Dec 2014 23:29:35 -0800 Subject: Added cmark_render_man (man page writer). cmark: Replaced `--man` and `--ast` with generic `--to` option. --- src/CMakeLists.txt | 1 + src/cmark.h | 5 ++ src/main.c | 61 +++++++++---- src/man.c | 245 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 296 insertions(+), 16 deletions(-) create mode 100644 src/man.c (limited to 'src') diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index e6a578c..75a0152 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -28,6 +28,7 @@ set(LIBRARY_SOURCES utf8.c buffer.c references.c + man.c html.c html_unescape.gperf houdini_href_e.c diff --git a/src/cmark.h b/src/cmark.h index 42ffa81..d276ba9 100644 --- a/src/cmark.h +++ b/src/cmark.h @@ -346,6 +346,11 @@ char *cmark_render_ast(cmark_node *root); CMARK_EXPORT char *cmark_render_html(cmark_node *root); +/** + */ +CMARK_EXPORT +char *cmark_render_man(cmark_node *root); + /** .SH AUTHORS * * John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer. diff --git a/src/main.c b/src/main.c index 6134b13..be3d305 100644 --- a/src/main.c +++ b/src/main.c @@ -7,22 +7,38 @@ #include "debug.h" #include "bench.h" +typedef enum { + FORMAT_NONE, + FORMAT_HTML, + FORMAT_MAN, + FORMAT_AST +} writer_format; + void print_usage() { printf("Usage: cmark [FILE*]\n"); - printf("Options: --help, -h Print usage information\n"); - printf(" --ast Print AST instead of HTML\n"); - printf(" --version Print version\n"); + printf("Options:\n"); + printf(" --to, -t FORMAT Specify output format (html, man, ast)\n"); + printf(" --help, -h Print usage information\n"); + printf(" --version Print version\n"); } -static void print_document(cmark_node *document, bool ast) +static void print_document(cmark_node *document, writer_format writer) { char *result; - if (ast) { + switch (writer) { + case FORMAT_AST: result = cmark_render_ast(document); - } else { - + break; + case FORMAT_HTML: result = cmark_render_html(document); + break; + case FORMAT_MAN: + result = cmark_render_man(document); + break; + default: + fprintf(stderr, "Unknown format %d\n", writer); + exit(1); } printf("%s", result); free(result); @@ -31,12 +47,12 @@ static void print_document(cmark_node *document, bool ast) int main(int argc, char *argv[]) { int i, numfps = 0; - bool ast = false; int *files; char buffer[4096]; cmark_parser *parser; size_t bytes; cmark_node *document; + writer_format writer = FORMAT_HTML; parser = cmark_parser_new(); files = (int *)malloc(argc * sizeof(*files)); @@ -50,8 +66,26 @@ int main(int argc, char *argv[]) (strcmp(argv[i], "-h") == 0)) { print_usage(); exit(0); - } else if (strcmp(argv[i], "--ast") == 0) { - ast = true; + } else if ((strcmp(argv[i], "-t") == 0) || + (strcmp(argv[i], "--to") == 0)) { + i += 1; + if (i < argc) { + if (strcmp(argv[i], "man") == 0) { + writer = FORMAT_MAN; + } else if (strcmp(argv[i], "html") == 0) { + writer = FORMAT_HTML; + } else if (strcmp(argv[i], "ast") == 0) { + writer = FORMAT_AST; + } else { + fprintf(stderr, + "Unknown format %s\n", argv[i]); + exit(1); + } + } else { + fprintf(stderr, "No argument provided for %s\n", + argv[i - 1]); + exit(1); + } } else if (*argv[i] == '-') { print_usage(); exit(1); @@ -81,11 +115,6 @@ int main(int argc, char *argv[]) } if (numfps == 0) { - /* - document = cmark_parse_file(stdin); - print_document(document, ast); - exit(0); - */ while ((bytes = fread(buffer, 1, sizeof(buffer), stdin)) > 0) { cmark_parser_feed(parser, buffer, bytes); @@ -101,7 +130,7 @@ int main(int argc, char *argv[]) cmark_parser_free(parser); start_timer(); - print_document(document, ast); + print_document(document, writer); end_timer("print_document"); start_timer(); diff --git a/src/man.c b/src/man.c new file mode 100644 index 0000000..b86c7e6 --- /dev/null +++ b/src/man.c @@ -0,0 +1,245 @@ +#include +#include +#include +#include + +#include "config.h" +#include "cmark.h" +#include "node.h" +#include "buffer.h" + +// Functions to convert cmark_nodes to groff man strings. + +static void escape_man(strbuf *dest, const unsigned char *source, int length) +{ + int i; + unsigned char c; + + for (i = 0; i < length; i++) { + c = source[i]; + if (c == '.' && i == 0) { + strbuf_puts(dest, "\\&."); + } else if (c == '\'' && i == 0) { + strbuf_puts(dest, "\\&'"); + } else if (c == '-') { + strbuf_puts(dest, "\\-"); + } else if (c == '\\') { + strbuf_puts(dest, "\\e"); + } else { + strbuf_putc(dest, source[i]); + } + } +} + +static inline void cr(strbuf *man) +{ + if (man->size && man->ptr[man->size - 1] != '\n') + strbuf_putc(man, '\n'); +} + +struct render_state { + strbuf* man; + cmark_node *plain; +}; + +static int +S_render_node(cmark_node *node, cmark_event_type ev_type, void *vstate) +{ + struct render_state *state = vstate; + cmark_node *tmp; + strbuf *man = state->man; + int list_number; + bool entering = (ev_type == CMARK_EVENT_ENTER); + + if (state->plain == node) { // back at original node + state->plain = NULL; + } + + if (state->plain != NULL) { + switch(node->type) { + case CMARK_NODE_TEXT: + case CMARK_NODE_INLINE_CODE: + case CMARK_NODE_INLINE_HTML: + escape_man(man, node->as.literal.data, + node->as.literal.len); + break; + + case CMARK_NODE_LINEBREAK: + case CMARK_NODE_SOFTBREAK: + strbuf_putc(man, ' '); + break; + + default: + break; + } + return 1; + } + + switch (node->type) { + case CMARK_NODE_BLOCK_QUOTE: + if (entering) { + cr(man); + strbuf_puts(man, ".RS"); + cr(man); + } else { + cr(man); + strbuf_puts(man, ".RE"); + cr(man); + } + break; + + case CMARK_NODE_LIST: + break; + + case CMARK_NODE_LIST_ITEM: + if (entering) { + cr(man); + strbuf_puts(man, ".IP "); + if (cmark_node_get_list_type(node->parent) == + CMARK_BULLET_LIST) { + strbuf_puts(man, "\\[bu] 2"); + } else { + list_number = cmark_node_get_list_start(node->parent); + tmp = node; + while (tmp->prev) { + tmp = tmp->prev; + list_number += 1; + } + strbuf_printf(man, "\"%d.\" 4", list_number); + } + cr(man); + } else { + cr(man); + } + break; + + case CMARK_NODE_HEADER: + if (entering) { + cr(man); + strbuf_puts(man, + cmark_node_get_header_level(node) == 1 ? + ".SH" : ".SS"); + cr(man); + } else { + cr(man); + } + break; + + case CMARK_NODE_CODE_BLOCK: + cr(man); + strbuf_puts(man, ".IP\n.nf\n\\f[C]\n"); + escape_man(man, node->string_content.ptr, + node->string_content.size); + cr(man); + strbuf_puts(man, "\\f[]\n.fi"); + cr(man); + break; + + case CMARK_NODE_HTML: + break; + + case CMARK_NODE_HRULE: + cr(man); + strbuf_puts(man, ".PP\n * * * * *"); + cr(man); + break; + + case CMARK_NODE_PARAGRAPH: + if (entering) { + // no blank line if first paragraph in list: + if (node->parent && + node->parent->type == CMARK_NODE_LIST_ITEM && + node->prev == NULL) { + // no blank line or .PP + } else { + cr(man); + strbuf_puts(man, ".PP\n"); + } + } else { + cr(man); + } + break; + + case CMARK_NODE_TEXT: + escape_man(man, node->as.literal.data, + node->as.literal.len); + break; + + case CMARK_NODE_LINEBREAK: + strbuf_puts(man, ".PD 0\n.P\n.PD"); + cr(man); + break; + + case CMARK_NODE_SOFTBREAK: + strbuf_putc(man, '\n'); + break; + + case CMARK_NODE_INLINE_CODE: + strbuf_puts(man, "\\f[C]"); + escape_man(man, node->as.literal.data, node->as.literal.len); + strbuf_puts(man, "\\f[]"); + break; + + case CMARK_NODE_INLINE_HTML: + break; + + case CMARK_NODE_STRONG: + if (entering) { + strbuf_puts(man, "\\f[B]"); + } else { + strbuf_puts(man, "\\f[]"); + } + break; + + case CMARK_NODE_EMPH: + if (entering) { + strbuf_puts(man, "\\f[I]"); + } else { + strbuf_puts(man, "\\f[]"); + } + break; + + case CMARK_NODE_LINK: + if (!entering) { + strbuf_printf(man, " (%s)", + cmark_node_get_url(node)); + } + break; + + case CMARK_NODE_IMAGE: + if (entering) { + strbuf_puts(man, "[IMAGE: "); + state->plain = node; + } else { + strbuf_puts(man, "]"); + } + break; + + default: + assert(false); + break; + } + + // strbuf_putc(man, 'x'); + return 1; +} + +char *cmark_render_man(cmark_node *root) +{ + char *result; + strbuf man = GH_BUF_INIT; + struct render_state state = { &man, NULL }; + cmark_node *cur; + cmark_event_type ev_type; + cmark_iter *iter = cmark_iter_new(root); + + while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { + cur = cmark_iter_get_node(iter); + S_render_node(cur, ev_type, &state); + } + result = (char *)strbuf_detach(&man); + + cmark_iter_free(iter); + strbuf_free(&man); + return result; +} -- cgit v1.2.3