aboutsummaryrefslogtreecommitdiff
path: root/src/html/html.c
blob: 4fa79b1955145783a519c11e7111d03069b0f5e9 (plain)
  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <assert.h>
  5. #include "config.h"
  6. #include "cmark.h"
  7. #include "node.h"
  8. #include "buffer.h"
  9. #include "html/houdini.h"
  10. // Functions to convert cmark_nodes to HTML strings.
  11. static bool
  12. finish_node(strbuf *html, cmark_node *node, bool tight);
  13. static void escape_html(strbuf *dest, const unsigned char *source, int length)
  14. {
  15. if (length < 0)
  16. length = strlen((char *)source);
  17. houdini_escape_html0(dest, source, (size_t)length, 0);
  18. }
  19. static void escape_href(strbuf *dest, const unsigned char *source, int length)
  20. {
  21. if (length < 0)
  22. length = strlen((char *)source);
  23. houdini_escape_href(dest, source, (size_t)length);
  24. }
  25. static inline void cr(strbuf *html)
  26. {
  27. if (html->size && html->ptr[html->size - 1] != '\n')
  28. strbuf_putc(html, '\n');
  29. }
  30. // Convert the inline children of a node to a plain string.
  31. static void inlines_to_plain_html(strbuf *html, cmark_node* node)
  32. {
  33. cmark_node* cur = node->first_child;
  34. if (cur == NULL) {
  35. return;
  36. }
  37. while (true) {
  38. switch(cur->type) {
  39. case NODE_STRING:
  40. case NODE_INLINE_CODE:
  41. case NODE_INLINE_HTML:
  42. escape_html(html, cur->as.literal.data, cur->as.literal.len);
  43. break;
  44. case NODE_LINEBREAK:
  45. case NODE_SOFTBREAK:
  46. strbuf_putc(html, ' ');
  47. break;
  48. default:
  49. break;
  50. }
  51. if (cur->first_child) {
  52. cur = cur->first_child;
  53. continue;
  54. }
  55. next_sibling:
  56. if (cur->next) {
  57. cur = cur->next;
  58. continue;
  59. }
  60. cur = cur->parent;
  61. if (cur == node) {
  62. break;
  63. }
  64. goto next_sibling;
  65. }
  66. }
  67. // Convert a cmark_node to HTML.
  68. static void node_to_html(strbuf *html, cmark_node *node)
  69. {
  70. cmark_node *cur;
  71. char start_header[] = "<h0>";
  72. bool tight = false;
  73. bool visit_children;
  74. if (node == NULL) {
  75. return;
  76. }
  77. cur = node;
  78. while (true) {
  79. // Only NODE_IMAGE wants to skip its children.
  80. visit_children = true;
  81. switch(cur->type) {
  82. case NODE_DOCUMENT:
  83. break;
  84. case NODE_PARAGRAPH:
  85. if (!tight) {
  86. cr(html);
  87. strbuf_puts(html, "<p>");
  88. }
  89. break;
  90. case NODE_BQUOTE:
  91. cr(html);
  92. strbuf_puts(html, "<blockquote>\n");
  93. // BQUOTE doesn't use any of the 'as' structs,
  94. // so the 'list' member can be used to store the
  95. // current value of 'tight'.
  96. cur->as.list.tight = tight;
  97. tight = false;
  98. break;
  99. case NODE_LIST_ITEM:
  100. cr(html);
  101. strbuf_puts(html, "<li>");
  102. break;
  103. case NODE_LIST: {
  104. cmark_list *list = &cur->as.list;
  105. bool tmp;
  106. // make sure a list starts at the beginning of the line:
  107. cr(html);
  108. if (list->list_type == CMARK_BULLET_LIST) {
  109. strbuf_puts(html, "<ul>\n");
  110. }
  111. else if (list->start == 1) {
  112. strbuf_puts(html, "<ol>\n");
  113. }
  114. else {
  115. strbuf_printf(html, "<ol start=\"%d\">\n",
  116. list->start);
  117. }
  118. // Store the current value of 'tight' by swapping.
  119. tmp = list->tight;
  120. list->tight = tight;
  121. tight = tmp;
  122. break;
  123. }
  124. case NODE_ATX_HEADER:
  125. case NODE_SETEXT_HEADER:
  126. cr(html);
  127. start_header[2] = '0' + cur->as.header.level;
  128. strbuf_puts(html, start_header);
  129. break;
  130. case NODE_INDENTED_CODE:
  131. case NODE_FENCED_CODE: {
  132. strbuf *info = &cur->as.code.info;
  133. cr(html);
  134. if (cur->type != NODE_FENCED_CODE
  135. || strbuf_len(info) == 0) {
  136. strbuf_puts(html, "<pre><code>");
  137. }
  138. else {
  139. int first_tag = strbuf_strchr(info, ' ', 0);
  140. if (first_tag < 0)
  141. first_tag = strbuf_len(info);
  142. strbuf_puts(html,
  143. "<pre><code class=\"language-");
  144. escape_html(html, info->ptr, first_tag);
  145. strbuf_puts(html, "\">");
  146. }
  147. escape_html(html, cur->string_content.ptr, cur->string_content.size);
  148. break;
  149. }
  150. case NODE_HTML:
  151. strbuf_put(html, cur->string_content.ptr, cur->string_content.size);
  152. break;
  153. case NODE_HRULE:
  154. strbuf_puts(html, "<hr />\n");
  155. break;
  156. case NODE_REFERENCE_DEF:
  157. break;
  158. case NODE_STRING:
  159. escape_html(html, cur->as.literal.data, cur->as.literal.len);
  160. break;
  161. case NODE_LINEBREAK:
  162. strbuf_puts(html, "<br />\n");
  163. break;
  164. case NODE_SOFTBREAK:
  165. strbuf_putc(html, '\n');
  166. break;
  167. case NODE_INLINE_CODE:
  168. strbuf_puts(html, "<code>");
  169. escape_html(html, cur->as.literal.data, cur->as.literal.len);
  170. break;
  171. case NODE_INLINE_HTML:
  172. strbuf_put(html,
  173. cur->as.literal.data,
  174. cur->as.literal.len);
  175. break;
  176. case NODE_LINK:
  177. strbuf_puts(html, "<a href=\"");
  178. if (cur->as.link.url)
  179. escape_href(html, cur->as.link.url, -1);
  180. if (cur->as.link.title) {
  181. strbuf_puts(html, "\" title=\"");
  182. escape_html(html, cur->as.link.title, -1);
  183. }
  184. strbuf_puts(html, "\">");
  185. break;
  186. case NODE_IMAGE:
  187. strbuf_puts(html, "<img src=\"");
  188. if (cur->as.link.url)
  189. escape_href(html, cur->as.link.url, -1);
  190. strbuf_puts(html, "\" alt=\"");
  191. inlines_to_plain_html(html, cur);
  192. if (cur->as.link.title) {
  193. strbuf_puts(html, "\" title=\"");
  194. escape_html(html, cur->as.link.title, -1);
  195. }
  196. strbuf_puts(html, "\" />");
  197. visit_children = false;
  198. break;
  199. case NODE_STRONG:
  200. strbuf_puts(html, "<strong>");
  201. break;
  202. case NODE_EMPH:
  203. strbuf_puts(html, "<em>");
  204. break;
  205. default:
  206. assert(false);
  207. }
  208. if (visit_children && cur->first_child) {
  209. cur = cur->first_child;
  210. continue;
  211. }
  212. next_sibling:
  213. tight = finish_node(html, cur, tight);
  214. if (cur == node) {
  215. break;
  216. }
  217. if (cur->next) {
  218. cur = cur->next;
  219. continue;
  220. }
  221. cur = cur->parent;
  222. goto next_sibling;
  223. }
  224. }
  225. // Returns the restored value of 'tight'.
  226. static bool
  227. finish_node(strbuf *html, cmark_node *node, bool tight)
  228. {
  229. char end_header[] = "</h0>\n";
  230. switch (node->type) {
  231. case NODE_PARAGRAPH:
  232. if (!tight) {
  233. strbuf_puts(html, "</p>\n");
  234. }
  235. break;
  236. case NODE_BQUOTE: {
  237. cmark_list *list = &node->as.list;
  238. strbuf_puts(html, "</blockquote>\n");
  239. // Restore old 'tight' value.
  240. tight = list->tight;
  241. list->tight = false;
  242. break;
  243. }
  244. case NODE_LIST_ITEM:
  245. strbuf_puts(html, "</li>\n");
  246. break;
  247. case NODE_LIST: {
  248. cmark_list *list = &node->as.list;
  249. bool tmp;
  250. strbuf_puts(html,
  251. list->list_type == CMARK_BULLET_LIST ?
  252. "</ul>\n" : "</ol>\n");
  253. // Restore old 'tight' value.
  254. tmp = tight;
  255. tight = list->tight;
  256. list->tight = tmp;
  257. break;
  258. }
  259. case NODE_ATX_HEADER:
  260. case NODE_SETEXT_HEADER:
  261. end_header[3] = '0' + node->as.header.level;
  262. strbuf_puts(html, end_header);
  263. break;
  264. case NODE_INDENTED_CODE:
  265. case NODE_FENCED_CODE:
  266. strbuf_puts(html, "</code></pre>\n");
  267. break;
  268. case NODE_INLINE_CODE:
  269. strbuf_puts(html, "</code>");
  270. break;
  271. case NODE_LINK:
  272. strbuf_puts(html, "</a>");
  273. break;
  274. case NODE_STRONG:
  275. strbuf_puts(html, "</strong>");
  276. break;
  277. case NODE_EMPH:
  278. strbuf_puts(html, "</em>");
  279. break;
  280. default:
  281. break;
  282. }
  283. return tight;
  284. }
  285. char *cmark_render_html(cmark_node *root)
  286. {
  287. char *result;
  288. strbuf html = GH_BUF_INIT;
  289. node_to_html(&html, root);
  290. result = (char *)strbuf_detach(&html);
  291. strbuf_free(&html);
  292. return result;
  293. }