aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn MacFarlane <jgm@berkeley.edu>2014-12-12 23:29:35 -0800
committerJohn MacFarlane <jgm@berkeley.edu>2014-12-13 20:19:41 -0800
commit59cc3c9323dc0b7aa1fd5817e12884ef925461d4 (patch)
treeaae78b1d3ed29f4b5f3a831d9e8c09b0d5d9580f
parenta3030f985a973b3b835645313fdad1a8a72ff432 (diff)
Added cmark_render_man (man page writer).
cmark: Replaced `--man` and `--ast` with generic `--to` option.
-rw-r--r--man/man1/cmark.17
-rw-r--r--man/man3/cmark.34
-rw-r--r--src/CMakeLists.txt1
-rw-r--r--src/cmark.h5
-rw-r--r--src/main.c61
-rw-r--r--src/man.c245
6 files changed, 304 insertions, 19 deletions
diff --git a/man/man1/cmark.1 b/man/man1/cmark.1
index 4118520..2d5af77 100644
--- a/man/man1/cmark.1
+++ b/man/man1/cmark.1
@@ -13,14 +13,15 @@ acts as a pipe, reading from
\fRstdin\fR
or from the specified files and writing to
\fRstdout\fR.
-It converts Markdown formatted plain text to HTML, using the conventions
+It converts Markdown formatted plain text to HTML (or groff man or
+an abstract representation of the AST), using the conventions
described in the CommonMark spec.
If multiple files are specified, the contents of the files are simply
concatenated before parsing.
.SH "OPTIONS"
.TP 12n
-\-\--ast
-Print an abstract syntax tree instead of HTML.
+\-\-to, \-t \f[I]FORMAT\f[]
+Specify output format (\f[C]html\f[], \f[C]man\f[], \f[C]ast\f[]).
.TP 12n
\-\-help
Print usage information.
diff --git a/man/man3/cmark.3 b/man/man3/cmark.3
index 62d89e2..5f6d296 100644
--- a/man/man3/cmark.3
+++ b/man/man3/cmark.3
@@ -292,6 +292,10 @@ typedef enum {
.PP
+\fIchar *\fR \fBcmark_render_man\fR(\fIcmark_node *root\fR)
+
+.PP
+
.SH AUTHORS
John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer.
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index e6a578c..75a0152 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -28,6 +28,7 @@ set(LIBRARY_SOURCES
utf8.c
buffer.c
references.c
+ man.c
html.c
html_unescape.gperf
houdini_href_e.c
diff --git a/src/cmark.h b/src/cmark.h
index 42ffa81..d276ba9 100644
--- a/src/cmark.h
+++ b/src/cmark.h
@@ -346,6 +346,11 @@ char *cmark_render_ast(cmark_node *root);
CMARK_EXPORT
char *cmark_render_html(cmark_node *root);
+/**
+ */
+CMARK_EXPORT
+char *cmark_render_man(cmark_node *root);
+
/** .SH AUTHORS
*
* John MacFarlane, Vicent Marti, Kārlis Gaņģis, Nick Wellnhofer.
diff --git a/src/main.c b/src/main.c
index 6134b13..be3d305 100644
--- a/src/main.c
+++ b/src/main.c
@@ -7,22 +7,38 @@
#include "debug.h"
#include "bench.h"
+typedef enum {
+ FORMAT_NONE,
+ FORMAT_HTML,
+ FORMAT_MAN,
+ FORMAT_AST
+} writer_format;
+
void print_usage()
{
printf("Usage: cmark [FILE*]\n");
- printf("Options: --help, -h Print usage information\n");
- printf(" --ast Print AST instead of HTML\n");
- printf(" --version Print version\n");
+ printf("Options:\n");
+ printf(" --to, -t FORMAT Specify output format (html, man, ast)\n");
+ printf(" --help, -h Print usage information\n");
+ printf(" --version Print version\n");
}
-static void print_document(cmark_node *document, bool ast)
+static void print_document(cmark_node *document, writer_format writer)
{
char *result;
- if (ast) {
+ switch (writer) {
+ case FORMAT_AST:
result = cmark_render_ast(document);
- } else {
-
+ break;
+ case FORMAT_HTML:
result = cmark_render_html(document);
+ break;
+ case FORMAT_MAN:
+ result = cmark_render_man(document);
+ break;
+ default:
+ fprintf(stderr, "Unknown format %d\n", writer);
+ exit(1);
}
printf("%s", result);
free(result);
@@ -31,12 +47,12 @@ static void print_document(cmark_node *document, bool ast)
int main(int argc, char *argv[])
{
int i, numfps = 0;
- bool ast = false;
int *files;
char buffer[4096];
cmark_parser *parser;
size_t bytes;
cmark_node *document;
+ writer_format writer = FORMAT_HTML;
parser = cmark_parser_new();
files = (int *)malloc(argc * sizeof(*files));
@@ -50,8 +66,26 @@ int main(int argc, char *argv[])
(strcmp(argv[i], "-h") == 0)) {
print_usage();
exit(0);
- } else if (strcmp(argv[i], "--ast") == 0) {
- ast = true;
+ } else if ((strcmp(argv[i], "-t") == 0) ||
+ (strcmp(argv[i], "--to") == 0)) {
+ i += 1;
+ if (i < argc) {
+ if (strcmp(argv[i], "man") == 0) {
+ writer = FORMAT_MAN;
+ } else if (strcmp(argv[i], "html") == 0) {
+ writer = FORMAT_HTML;
+ } else if (strcmp(argv[i], "ast") == 0) {
+ writer = FORMAT_AST;
+ } else {
+ fprintf(stderr,
+ "Unknown format %s\n", argv[i]);
+ exit(1);
+ }
+ } else {
+ fprintf(stderr, "No argument provided for %s\n",
+ argv[i - 1]);
+ exit(1);
+ }
} else if (*argv[i] == '-') {
print_usage();
exit(1);
@@ -81,11 +115,6 @@ int main(int argc, char *argv[])
}
if (numfps == 0) {
- /*
- document = cmark_parse_file(stdin);
- print_document(document, ast);
- exit(0);
- */
while ((bytes = fread(buffer, 1, sizeof(buffer), stdin)) > 0) {
cmark_parser_feed(parser, buffer, bytes);
@@ -101,7 +130,7 @@ int main(int argc, char *argv[])
cmark_parser_free(parser);
start_timer();
- print_document(document, ast);
+ print_document(document, writer);
end_timer("print_document");
start_timer();
diff --git a/src/man.c b/src/man.c
new file mode 100644
index 0000000..b86c7e6
--- /dev/null
+++ b/src/man.c
@@ -0,0 +1,245 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+#include "config.h"
+#include "cmark.h"
+#include "node.h"
+#include "buffer.h"
+
+// Functions to convert cmark_nodes to groff man strings.
+
+static void escape_man(strbuf *dest, const unsigned char *source, int length)
+{
+ int i;
+ unsigned char c;
+
+ for (i = 0; i < length; i++) {
+ c = source[i];
+ if (c == '.' && i == 0) {
+ strbuf_puts(dest, "\\&.");
+ } else if (c == '\'' && i == 0) {
+ strbuf_puts(dest, "\\&'");
+ } else if (c == '-') {
+ strbuf_puts(dest, "\\-");
+ } else if (c == '\\') {
+ strbuf_puts(dest, "\\e");
+ } else {
+ strbuf_putc(dest, source[i]);
+ }
+ }
+}
+
+static inline void cr(strbuf *man)
+{
+ if (man->size && man->ptr[man->size - 1] != '\n')
+ strbuf_putc(man, '\n');
+}
+
+struct render_state {
+ strbuf* man;
+ cmark_node *plain;
+};
+
+static int
+S_render_node(cmark_node *node, cmark_event_type ev_type, void *vstate)
+{
+ struct render_state *state = vstate;
+ cmark_node *tmp;
+ strbuf *man = state->man;
+ int list_number;
+ bool entering = (ev_type == CMARK_EVENT_ENTER);
+
+ if (state->plain == node) { // back at original node
+ state->plain = NULL;
+ }
+
+ if (state->plain != NULL) {
+ switch(node->type) {
+ case CMARK_NODE_TEXT:
+ case CMARK_NODE_INLINE_CODE:
+ case CMARK_NODE_INLINE_HTML:
+ escape_man(man, node->as.literal.data,
+ node->as.literal.len);
+ break;
+
+ case CMARK_NODE_LINEBREAK:
+ case CMARK_NODE_SOFTBREAK:
+ strbuf_putc(man, ' ');
+ break;
+
+ default:
+ break;
+ }
+ return 1;
+ }
+
+ switch (node->type) {
+ case CMARK_NODE_BLOCK_QUOTE:
+ if (entering) {
+ cr(man);
+ strbuf_puts(man, ".RS");
+ cr(man);
+ } else {
+ cr(man);
+ strbuf_puts(man, ".RE");
+ cr(man);
+ }
+ break;
+
+ case CMARK_NODE_LIST:
+ break;
+
+ case CMARK_NODE_LIST_ITEM:
+ if (entering) {
+ cr(man);
+ strbuf_puts(man, ".IP ");
+ if (cmark_node_get_list_type(node->parent) ==
+ CMARK_BULLET_LIST) {
+ strbuf_puts(man, "\\[bu] 2");
+ } else {
+ list_number = cmark_node_get_list_start(node->parent);
+ tmp = node;
+ while (tmp->prev) {
+ tmp = tmp->prev;
+ list_number += 1;
+ }
+ strbuf_printf(man, "\"%d.\" 4", list_number);
+ }
+ cr(man);
+ } else {
+ cr(man);
+ }
+ break;
+
+ case CMARK_NODE_HEADER:
+ if (entering) {
+ cr(man);
+ strbuf_puts(man,
+ cmark_node_get_header_level(node) == 1 ?
+ ".SH" : ".SS");
+ cr(man);
+ } else {
+ cr(man);
+ }
+ break;
+
+ case CMARK_NODE_CODE_BLOCK:
+ cr(man);
+ strbuf_puts(man, ".IP\n.nf\n\\f[C]\n");
+ escape_man(man, node->string_content.ptr,
+ node->string_content.size);
+ cr(man);
+ strbuf_puts(man, "\\f[]\n.fi");
+ cr(man);
+ break;
+
+ case CMARK_NODE_HTML:
+ break;
+
+ case CMARK_NODE_HRULE:
+ cr(man);
+ strbuf_puts(man, ".PP\n * * * * *");
+ cr(man);
+ break;
+
+ case CMARK_NODE_PARAGRAPH:
+ if (entering) {
+ // no blank line if first paragraph in list:
+ if (node->parent &&
+ node->parent->type == CMARK_NODE_LIST_ITEM &&
+ node->prev == NULL) {
+ // no blank line or .PP
+ } else {
+ cr(man);
+ strbuf_puts(man, ".PP\n");
+ }
+ } else {
+ cr(man);
+ }
+ break;
+
+ case CMARK_NODE_TEXT:
+ escape_man(man, node->as.literal.data,
+ node->as.literal.len);
+ break;
+
+ case CMARK_NODE_LINEBREAK:
+ strbuf_puts(man, ".PD 0\n.P\n.PD");
+ cr(man);
+ break;
+
+ case CMARK_NODE_SOFTBREAK:
+ strbuf_putc(man, '\n');
+ break;
+
+ case CMARK_NODE_INLINE_CODE:
+ strbuf_puts(man, "\\f[C]");
+ escape_man(man, node->as.literal.data, node->as.literal.len);
+ strbuf_puts(man, "\\f[]");
+ break;
+
+ case CMARK_NODE_INLINE_HTML:
+ break;
+
+ case CMARK_NODE_STRONG:
+ if (entering) {
+ strbuf_puts(man, "\\f[B]");
+ } else {
+ strbuf_puts(man, "\\f[]");
+ }
+ break;
+
+ case CMARK_NODE_EMPH:
+ if (entering) {
+ strbuf_puts(man, "\\f[I]");
+ } else {
+ strbuf_puts(man, "\\f[]");
+ }
+ break;
+
+ case CMARK_NODE_LINK:
+ if (!entering) {
+ strbuf_printf(man, " (%s)",
+ cmark_node_get_url(node));
+ }
+ break;
+
+ case CMARK_NODE_IMAGE:
+ if (entering) {
+ strbuf_puts(man, "[IMAGE: ");
+ state->plain = node;
+ } else {
+ strbuf_puts(man, "]");
+ }
+ break;
+
+ default:
+ assert(false);
+ break;
+ }
+
+ // strbuf_putc(man, 'x');
+ return 1;
+}
+
+char *cmark_render_man(cmark_node *root)
+{
+ char *result;
+ strbuf man = GH_BUF_INIT;
+ struct render_state state = { &man, NULL };
+ cmark_node *cur;
+ cmark_event_type ev_type;
+ cmark_iter *iter = cmark_iter_new(root);
+
+ while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
+ cur = cmark_iter_get_node(iter);
+ S_render_node(cur, ev_type, &state);
+ }
+ result = (char *)strbuf_detach(&man);
+
+ cmark_iter_free(iter);
+ strbuf_free(&man);
+ return result;
+}