aboutsummaryrefslogtreecommitdiff
path: root/src/html/html.c
blob: e6971f8e4c644437cd25236100df549653a2ecc4 (plain)
  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <assert.h>
  5. #include "config.h"
  6. #include "cmark.h"
  7. #include "node.h"
  8. #include "buffer.h"
  9. #include "html/houdini.h"
  10. // Functions to convert cmark_nodes to HTML strings.
  11. static bool
  12. finish_node(strbuf *html, cmark_node *node, bool tight);
  13. static void escape_html(strbuf *dest, const unsigned char *source, int length)
  14. {
  15. if (length < 0)
  16. length = strlen((char *)source);
  17. houdini_escape_html0(dest, source, (size_t)length, 0);
  18. }
  19. static void escape_href(strbuf *dest, const unsigned char *source, int length)
  20. {
  21. if (length < 0)
  22. length = strlen((char *)source);
  23. houdini_escape_href(dest, source, (size_t)length);
  24. }
  25. static inline void cr(strbuf *html)
  26. {
  27. if (html->size && html->ptr[html->size - 1] != '\n')
  28. strbuf_putc(html, '\n');
  29. }
  30. // Convert the inline children of a node to a plain string.
  31. static void inlines_to_plain_html(strbuf *html, cmark_node* node)
  32. {
  33. cmark_node* cur = node->first_child;
  34. if (cur == NULL) {
  35. return;
  36. }
  37. while (true) {
  38. switch(cur->type) {
  39. case NODE_STRING:
  40. case NODE_INLINE_CODE:
  41. case NODE_INLINE_HTML:
  42. escape_html(html, cur->as.literal.data, cur->as.literal.len);
  43. break;
  44. case NODE_LINEBREAK:
  45. case NODE_SOFTBREAK:
  46. strbuf_putc(html, ' ');
  47. break;
  48. default:
  49. break;
  50. }
  51. if (cur->first_child) {
  52. cur = cur->first_child;
  53. continue;
  54. }
  55. next_sibling:
  56. if (cur->next) {
  57. cur = cur->next;
  58. continue;
  59. }
  60. cur = cur->parent;
  61. if (cur == node) {
  62. break;
  63. }
  64. goto next_sibling;
  65. }
  66. }
  67. // Convert a cmark_node to HTML.
  68. static void node_to_html(strbuf *html, cmark_node *node)
  69. {
  70. cmark_node *cur;
  71. char start_header[] = "<h0>";
  72. bool tight = false;
  73. bool visit_children;
  74. if (node == NULL) {
  75. return;
  76. }
  77. cur = node;
  78. while (true) {
  79. // Only NODE_IMAGE wants to skip its children.
  80. visit_children = true;
  81. switch(cur->type) {
  82. case NODE_DOCUMENT:
  83. break;
  84. case NODE_PARAGRAPH:
  85. if (!tight) {
  86. cr(html);
  87. strbuf_puts(html, "<p>");
  88. }
  89. break;
  90. case NODE_BLOCK_QUOTE:
  91. cr(html);
  92. strbuf_puts(html, "<blockquote>\n");
  93. // BLOCK_QUOTE doesn't use any of the 'as' structs,
  94. // so the 'list' member can be used to store the
  95. // current value of 'tight'.
  96. cur->as.list.tight = tight;
  97. tight = false;
  98. break;
  99. case NODE_LIST_ITEM:
  100. cr(html);
  101. strbuf_puts(html, "<li>");
  102. break;
  103. case NODE_LIST: {
  104. cmark_list *list = &cur->as.list;
  105. bool tmp;
  106. // make sure a list starts at the beginning of the line:
  107. cr(html);
  108. if (list->list_type == CMARK_BULLET_LIST) {
  109. strbuf_puts(html, "<ul>\n");
  110. }
  111. else if (list->start == 1) {
  112. strbuf_puts(html, "<ol>\n");
  113. }
  114. else {
  115. strbuf_printf(html, "<ol start=\"%d\">\n",
  116. list->start);
  117. }
  118. // Store the current value of 'tight' by swapping.
  119. tmp = list->tight;
  120. list->tight = tight;
  121. tight = tmp;
  122. break;
  123. }
  124. case NODE_HEADER:
  125. cr(html);
  126. start_header[2] = '0' + cur->as.header.level;
  127. strbuf_puts(html, start_header);
  128. break;
  129. case NODE_INDENTED_CODE:
  130. case NODE_FENCED_CODE: {
  131. strbuf *info = &cur->as.code.info;
  132. cr(html);
  133. if (cur->type != NODE_FENCED_CODE
  134. || strbuf_len(info) == 0) {
  135. strbuf_puts(html, "<pre><code>");
  136. }
  137. else {
  138. int first_tag = strbuf_strchr(info, ' ', 0);
  139. if (first_tag < 0)
  140. first_tag = strbuf_len(info);
  141. strbuf_puts(html,
  142. "<pre><code class=\"language-");
  143. escape_html(html, info->ptr, first_tag);
  144. strbuf_puts(html, "\">");
  145. }
  146. escape_html(html, cur->string_content.ptr, cur->string_content.size);
  147. break;
  148. }
  149. case NODE_HTML:
  150. cr(html);
  151. strbuf_put(html, cur->string_content.ptr, cur->string_content.size);
  152. break;
  153. case NODE_HRULE:
  154. cr(html);
  155. strbuf_puts(html, "<hr />\n");
  156. break;
  157. case NODE_REFERENCE_DEF:
  158. break;
  159. case NODE_STRING:
  160. escape_html(html, cur->as.literal.data, cur->as.literal.len);
  161. break;
  162. case NODE_LINEBREAK:
  163. strbuf_puts(html, "<br />\n");
  164. break;
  165. case NODE_SOFTBREAK:
  166. strbuf_putc(html, '\n');
  167. break;
  168. case NODE_INLINE_CODE:
  169. strbuf_puts(html, "<code>");
  170. escape_html(html, cur->as.literal.data, cur->as.literal.len);
  171. break;
  172. case NODE_INLINE_HTML:
  173. strbuf_put(html,
  174. cur->as.literal.data,
  175. cur->as.literal.len);
  176. break;
  177. case NODE_LINK:
  178. strbuf_puts(html, "<a href=\"");
  179. if (cur->as.link.url)
  180. escape_href(html, cur->as.link.url, -1);
  181. if (cur->as.link.title) {
  182. strbuf_puts(html, "\" title=\"");
  183. escape_html(html, cur->as.link.title, -1);
  184. }
  185. strbuf_puts(html, "\">");
  186. break;
  187. case NODE_IMAGE:
  188. strbuf_puts(html, "<img src=\"");
  189. if (cur->as.link.url)
  190. escape_href(html, cur->as.link.url, -1);
  191. strbuf_puts(html, "\" alt=\"");
  192. inlines_to_plain_html(html, cur);
  193. if (cur->as.link.title) {
  194. strbuf_puts(html, "\" title=\"");
  195. escape_html(html, cur->as.link.title, -1);
  196. }
  197. strbuf_puts(html, "\" />");
  198. visit_children = false;
  199. break;
  200. case NODE_STRONG:
  201. strbuf_puts(html, "<strong>");
  202. break;
  203. case NODE_EMPH:
  204. strbuf_puts(html, "<em>");
  205. break;
  206. default:
  207. assert(false);
  208. }
  209. if (visit_children && cur->first_child) {
  210. cur = cur->first_child;
  211. continue;
  212. }
  213. next_sibling:
  214. tight = finish_node(html, cur, tight);
  215. if (cur == node) {
  216. break;
  217. }
  218. if (cur->next) {
  219. cur = cur->next;
  220. continue;
  221. }
  222. cur = cur->parent;
  223. goto next_sibling;
  224. }
  225. }
  226. // Returns the restored value of 'tight'.
  227. static bool
  228. finish_node(strbuf *html, cmark_node *node, bool tight)
  229. {
  230. char end_header[] = "</h0>\n";
  231. switch (node->type) {
  232. case NODE_PARAGRAPH:
  233. if (!tight) {
  234. strbuf_puts(html, "</p>\n");
  235. }
  236. break;
  237. case NODE_BLOCK_QUOTE: {
  238. cmark_list *list = &node->as.list;
  239. strbuf_puts(html, "</blockquote>\n");
  240. // Restore old 'tight' value.
  241. tight = list->tight;
  242. list->tight = false;
  243. break;
  244. }
  245. case NODE_LIST_ITEM:
  246. strbuf_puts(html, "</li>\n");
  247. break;
  248. case NODE_LIST: {
  249. cmark_list *list = &node->as.list;
  250. bool tmp;
  251. strbuf_puts(html,
  252. list->list_type == CMARK_BULLET_LIST ?
  253. "</ul>\n" : "</ol>\n");
  254. // Restore old 'tight' value.
  255. tmp = tight;
  256. tight = list->tight;
  257. list->tight = tmp;
  258. break;
  259. }
  260. case NODE_HEADER:
  261. end_header[3] = '0' + node->as.header.level;
  262. strbuf_puts(html, end_header);
  263. break;
  264. case NODE_INDENTED_CODE:
  265. case NODE_FENCED_CODE:
  266. strbuf_puts(html, "</code></pre>\n");
  267. break;
  268. case NODE_INLINE_CODE:
  269. strbuf_puts(html, "</code>");
  270. break;
  271. case NODE_LINK:
  272. strbuf_puts(html, "</a>");
  273. break;
  274. case NODE_STRONG:
  275. strbuf_puts(html, "</strong>");
  276. break;
  277. case NODE_EMPH:
  278. strbuf_puts(html, "</em>");
  279. break;
  280. default:
  281. break;
  282. }
  283. return tight;
  284. }
  285. char *cmark_render_html(cmark_node *root)
  286. {
  287. char *result;
  288. strbuf html = GH_BUF_INIT;
  289. node_to_html(&html, root);
  290. result = (char *)strbuf_detach(&html);
  291. strbuf_free(&html);
  292. return result;
  293. }