aboutsummaryrefslogtreecommitdiff
path: root/src/html/html.c
blob: 889941c35e9321d7b6802976f575e8279e1bf16f (plain)
  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <assert.h>
  5. #include "config.h"
  6. #include "cmark.h"
  7. #include "node.h"
  8. #include "buffer.h"
  9. #include "html/houdini.h"
  10. // Functions to convert cmark_nodes to HTML strings.
  11. static bool
  12. finish_node(strbuf *html, cmark_node *node, bool tight);
  13. static void escape_html(strbuf *dest, const unsigned char *source, int length)
  14. {
  15. if (length < 0)
  16. length = strlen((char *)source);
  17. houdini_escape_html0(dest, source, (size_t)length, 0);
  18. }
  19. static void escape_href(strbuf *dest, const unsigned char *source, int length)
  20. {
  21. if (length < 0)
  22. length = strlen((char *)source);
  23. houdini_escape_href(dest, source, (size_t)length);
  24. }
  25. static inline void cr(strbuf *html)
  26. {
  27. if (html->size && html->ptr[html->size - 1] != '\n')
  28. strbuf_putc(html, '\n');
  29. }
  30. // Convert the inline children of a node to a plain string.
  31. static void inlines_to_plain_html(strbuf *html, cmark_node* node)
  32. {
  33. cmark_node* cur = node->first_child;
  34. if (cur == NULL) {
  35. return;
  36. }
  37. while (true) {
  38. switch(cur->type) {
  39. case NODE_STRING:
  40. case NODE_INLINE_CODE:
  41. case NODE_INLINE_HTML:
  42. escape_html(html, cur->as.literal.data, cur->as.literal.len);
  43. break;
  44. case NODE_LINEBREAK:
  45. case NODE_SOFTBREAK:
  46. strbuf_putc(html, ' ');
  47. break;
  48. default:
  49. break;
  50. }
  51. if (cur->first_child) {
  52. cur = cur->first_child;
  53. continue;
  54. }
  55. next_sibling:
  56. if (cur->next) {
  57. cur = cur->next;
  58. continue;
  59. }
  60. cur = cur->parent;
  61. if (cur == node) {
  62. break;
  63. }
  64. goto next_sibling;
  65. }
  66. }
  67. // Convert a cmark_node to HTML.
  68. static void node_to_html(strbuf *html, cmark_node *node)
  69. {
  70. cmark_node *cur;
  71. char start_header[] = "<h0>";
  72. bool tight = false;
  73. bool visit_children;
  74. if (node == NULL) {
  75. return;
  76. }
  77. cur = node;
  78. while (true) {
  79. // Only NODE_IMAGE wants to skip its children.
  80. visit_children = true;
  81. switch(cur->type) {
  82. case NODE_DOCUMENT:
  83. break;
  84. case NODE_PARAGRAPH:
  85. if (!tight) {
  86. cr(html);
  87. strbuf_puts(html, "<p>");
  88. }
  89. break;
  90. case NODE_BQUOTE:
  91. cr(html);
  92. strbuf_puts(html, "<blockquote>\n");
  93. // BQUOTE doesn't use any of the 'as' structs,
  94. // so the 'list' member can be used to store the
  95. // current value of 'tight'.
  96. cur->as.list.tight = tight;
  97. tight = false;
  98. break;
  99. case NODE_LIST_ITEM:
  100. cr(html);
  101. strbuf_puts(html, "<li>");
  102. break;
  103. case NODE_LIST: {
  104. cmark_list *list = &cur->as.list;
  105. bool tmp;
  106. // make sure a list starts at the beginning of the line:
  107. cr(html);
  108. if (list->list_type == CMARK_BULLET_LIST) {
  109. strbuf_puts(html, "<ul>\n");
  110. }
  111. else if (list->start == 1) {
  112. strbuf_puts(html, "<ol>\n");
  113. }
  114. else {
  115. strbuf_printf(html, "<ol start=\"%d\">\n",
  116. list->start);
  117. }
  118. // Store the current value of 'tight' by swapping.
  119. tmp = list->tight;
  120. list->tight = tight;
  121. tight = tmp;
  122. break;
  123. }
  124. case NODE_ATX_HEADER:
  125. case NODE_SETEXT_HEADER:
  126. cr(html);
  127. start_header[2] = '0' + cur->as.header.level;
  128. strbuf_puts(html, start_header);
  129. break;
  130. case NODE_INDENTED_CODE:
  131. case NODE_FENCED_CODE: {
  132. strbuf *info = &cur->as.code.info;
  133. cr(html);
  134. if (cur->type != NODE_FENCED_CODE
  135. || strbuf_len(info) == 0) {
  136. strbuf_puts(html, "<pre><code>");
  137. }
  138. else {
  139. int first_tag = strbuf_strchr(info, ' ', 0);
  140. if (first_tag < 0)
  141. first_tag = strbuf_len(info);
  142. strbuf_puts(html,
  143. "<pre><code class=\"language-");
  144. escape_html(html, info->ptr, first_tag);
  145. strbuf_puts(html, "\">");
  146. }
  147. escape_html(html, cur->string_content.ptr, cur->string_content.size);
  148. break;
  149. }
  150. case NODE_HTML:
  151. cr(html);
  152. strbuf_put(html, cur->string_content.ptr, cur->string_content.size);
  153. break;
  154. case NODE_HRULE:
  155. cr(html);
  156. strbuf_puts(html, "<hr />\n");
  157. break;
  158. case NODE_REFERENCE_DEF:
  159. break;
  160. case NODE_STRING:
  161. escape_html(html, cur->as.literal.data, cur->as.literal.len);
  162. break;
  163. case NODE_LINEBREAK:
  164. strbuf_puts(html, "<br />\n");
  165. break;
  166. case NODE_SOFTBREAK:
  167. strbuf_putc(html, '\n');
  168. break;
  169. case NODE_INLINE_CODE:
  170. strbuf_puts(html, "<code>");
  171. escape_html(html, cur->as.literal.data, cur->as.literal.len);
  172. break;
  173. case NODE_INLINE_HTML:
  174. strbuf_put(html,
  175. cur->as.literal.data,
  176. cur->as.literal.len);
  177. break;
  178. case NODE_LINK:
  179. strbuf_puts(html, "<a href=\"");
  180. if (cur->as.link.url)
  181. escape_href(html, cur->as.link.url, -1);
  182. if (cur->as.link.title) {
  183. strbuf_puts(html, "\" title=\"");
  184. escape_html(html, cur->as.link.title, -1);
  185. }
  186. strbuf_puts(html, "\">");
  187. break;
  188. case NODE_IMAGE:
  189. strbuf_puts(html, "<img src=\"");
  190. if (cur->as.link.url)
  191. escape_href(html, cur->as.link.url, -1);
  192. strbuf_puts(html, "\" alt=\"");
  193. inlines_to_plain_html(html, cur);
  194. if (cur->as.link.title) {
  195. strbuf_puts(html, "\" title=\"");
  196. escape_html(html, cur->as.link.title, -1);
  197. }
  198. strbuf_puts(html, "\" />");
  199. visit_children = false;
  200. break;
  201. case NODE_STRONG:
  202. strbuf_puts(html, "<strong>");
  203. break;
  204. case NODE_EMPH:
  205. strbuf_puts(html, "<em>");
  206. break;
  207. default:
  208. assert(false);
  209. }
  210. if (visit_children && cur->first_child) {
  211. cur = cur->first_child;
  212. continue;
  213. }
  214. next_sibling:
  215. tight = finish_node(html, cur, tight);
  216. if (cur == node) {
  217. break;
  218. }
  219. if (cur->next) {
  220. cur = cur->next;
  221. continue;
  222. }
  223. cur = cur->parent;
  224. goto next_sibling;
  225. }
  226. }
  227. // Returns the restored value of 'tight'.
  228. static bool
  229. finish_node(strbuf *html, cmark_node *node, bool tight)
  230. {
  231. char end_header[] = "</h0>\n";
  232. switch (node->type) {
  233. case NODE_PARAGRAPH:
  234. if (!tight) {
  235. strbuf_puts(html, "</p>\n");
  236. }
  237. break;
  238. case NODE_BQUOTE: {
  239. cmark_list *list = &node->as.list;
  240. strbuf_puts(html, "</blockquote>\n");
  241. // Restore old 'tight' value.
  242. tight = list->tight;
  243. list->tight = false;
  244. break;
  245. }
  246. case NODE_LIST_ITEM:
  247. strbuf_puts(html, "</li>\n");
  248. break;
  249. case NODE_LIST: {
  250. cmark_list *list = &node->as.list;
  251. bool tmp;
  252. strbuf_puts(html,
  253. list->list_type == CMARK_BULLET_LIST ?
  254. "</ul>\n" : "</ol>\n");
  255. // Restore old 'tight' value.
  256. tmp = tight;
  257. tight = list->tight;
  258. list->tight = tmp;
  259. break;
  260. }
  261. case NODE_ATX_HEADER:
  262. case NODE_SETEXT_HEADER:
  263. end_header[3] = '0' + node->as.header.level;
  264. strbuf_puts(html, end_header);
  265. break;
  266. case NODE_INDENTED_CODE:
  267. case NODE_FENCED_CODE:
  268. strbuf_puts(html, "</code></pre>\n");
  269. break;
  270. case NODE_INLINE_CODE:
  271. strbuf_puts(html, "</code>");
  272. break;
  273. case NODE_LINK:
  274. strbuf_puts(html, "</a>");
  275. break;
  276. case NODE_STRONG:
  277. strbuf_puts(html, "</strong>");
  278. break;
  279. case NODE_EMPH:
  280. strbuf_puts(html, "</em>");
  281. break;
  282. default:
  283. break;
  284. }
  285. return tight;
  286. }
  287. char *cmark_render_html(cmark_node *root)
  288. {
  289. char *result;
  290. strbuf html = GH_BUF_INIT;
  291. node_to_html(&html, root);
  292. result = (char *)strbuf_detach(&html);
  293. strbuf_free(&html);
  294. return result;
  295. }