aboutsummaryrefslogtreecommitdiff
path: root/src/html.c
blob: 60229cc5d2dd5297b0725da3d7fd5c6a19e4a5e4 (plain)
  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <assert.h>
  5. #include "config.h"
  6. #include "cmark.h"
  7. #include "node.h"
  8. #include "buffer.h"
  9. #include "houdini.h"
  10. // Functions to convert cmark_nodes to HTML strings.
  11. static bool
  12. finish_node(strbuf *html, cmark_node *node, bool tight);
  13. static void escape_html(strbuf *dest, const unsigned char *source, int length)
  14. {
  15. if (length < 0)
  16. length = strlen((char *)source);
  17. houdini_escape_html0(dest, source, (size_t)length, 0);
  18. }
  19. static void escape_href(strbuf *dest, const unsigned char *source, int length)
  20. {
  21. if (length < 0)
  22. length = strlen((char *)source);
  23. houdini_escape_href(dest, source, (size_t)length);
  24. }
  25. static inline void cr(strbuf *html)
  26. {
  27. if (html->size && html->ptr[html->size - 1] != '\n')
  28. strbuf_putc(html, '\n');
  29. }
  30. // Convert the inline children of a node to a plain string.
  31. static void inlines_to_plain_html(strbuf *html, cmark_node* node)
  32. {
  33. cmark_node* cur = node->first_child;
  34. if (cur == NULL) {
  35. return;
  36. }
  37. while (true) {
  38. switch(cur->type) {
  39. case NODE_TEXT:
  40. case NODE_INLINE_CODE:
  41. case NODE_INLINE_HTML:
  42. escape_html(html, cur->as.literal.data, cur->as.literal.len);
  43. break;
  44. case NODE_LINEBREAK:
  45. case NODE_SOFTBREAK:
  46. strbuf_putc(html, ' ');
  47. break;
  48. default:
  49. break;
  50. }
  51. if (cur->first_child) {
  52. cur = cur->first_child;
  53. continue;
  54. }
  55. next_sibling:
  56. if (cur->next) {
  57. cur = cur->next;
  58. continue;
  59. }
  60. cur = cur->parent;
  61. if (cur == node) {
  62. break;
  63. }
  64. goto next_sibling;
  65. }
  66. }
  67. // Convert a cmark_node to HTML.
  68. static void node_to_html(strbuf *html, cmark_node *node)
  69. {
  70. cmark_node *cur;
  71. char start_header[] = "<h0>";
  72. bool tight = false;
  73. bool visit_children;
  74. strbuf *info;
  75. if (node == NULL) {
  76. return;
  77. }
  78. cur = node;
  79. while (true) {
  80. // Only NODE_IMAGE wants to skip its children.
  81. visit_children = true;
  82. switch(cur->type) {
  83. case NODE_DOCUMENT:
  84. break;
  85. case NODE_PARAGRAPH:
  86. if (!tight) {
  87. cr(html);
  88. strbuf_puts(html, "<p>");
  89. }
  90. break;
  91. case NODE_BLOCK_QUOTE:
  92. cr(html);
  93. strbuf_puts(html, "<blockquote>\n");
  94. // BLOCK_QUOTE doesn't use any of the 'as' structs,
  95. // so the 'list' member can be used to store the
  96. // current value of 'tight'.
  97. cur->as.list.tight = tight;
  98. tight = false;
  99. break;
  100. case NODE_LIST_ITEM:
  101. cr(html);
  102. strbuf_puts(html, "<li>");
  103. break;
  104. case NODE_LIST: {
  105. cmark_list *list = &cur->as.list;
  106. bool tmp;
  107. // make sure a list starts at the beginning of the line:
  108. cr(html);
  109. if (list->list_type == CMARK_BULLET_LIST) {
  110. strbuf_puts(html, "<ul>\n");
  111. }
  112. else if (list->start == 1) {
  113. strbuf_puts(html, "<ol>\n");
  114. }
  115. else {
  116. strbuf_printf(html, "<ol start=\"%d\">\n",
  117. list->start);
  118. }
  119. // Store the current value of 'tight' by swapping.
  120. tmp = list->tight;
  121. list->tight = tight;
  122. tight = tmp;
  123. break;
  124. }
  125. case NODE_HEADER:
  126. cr(html);
  127. start_header[2] = '0' + cur->as.header.level;
  128. strbuf_puts(html, start_header);
  129. break;
  130. case NODE_CODE_BLOCK:
  131. info = &cur->as.code.info;
  132. cr(html);
  133. if (&cur->as.code.fence_length == 0
  134. || strbuf_len(info) == 0) {
  135. strbuf_puts(html, "<pre><code>");
  136. }
  137. else {
  138. int first_tag = strbuf_strchr(info, ' ', 0);
  139. if (first_tag < 0)
  140. first_tag = strbuf_len(info);
  141. strbuf_puts(html,
  142. "<pre><code class=\"language-");
  143. escape_html(html, info->ptr, first_tag);
  144. strbuf_puts(html, "\">");
  145. }
  146. escape_html(html, cur->string_content.ptr, cur->string_content.size);
  147. break;
  148. case NODE_HTML:
  149. cr(html);
  150. strbuf_put(html, cur->string_content.ptr, cur->string_content.size);
  151. break;
  152. case NODE_HRULE:
  153. cr(html);
  154. strbuf_puts(html, "<hr />\n");
  155. break;
  156. case NODE_REFERENCE_DEF:
  157. break;
  158. case NODE_TEXT:
  159. escape_html(html, cur->as.literal.data, cur->as.literal.len);
  160. break;
  161. case NODE_LINEBREAK:
  162. strbuf_puts(html, "<br />\n");
  163. break;
  164. case NODE_SOFTBREAK:
  165. strbuf_putc(html, '\n');
  166. break;
  167. case NODE_INLINE_CODE:
  168. strbuf_puts(html, "<code>");
  169. escape_html(html, cur->as.literal.data, cur->as.literal.len);
  170. break;
  171. case NODE_INLINE_HTML:
  172. strbuf_put(html,
  173. cur->as.literal.data,
  174. cur->as.literal.len);
  175. break;
  176. case NODE_LINK:
  177. strbuf_puts(html, "<a href=\"");
  178. if (cur->as.link.url)
  179. escape_href(html, cur->as.link.url, -1);
  180. if (cur->as.link.title) {
  181. strbuf_puts(html, "\" title=\"");
  182. escape_html(html, cur->as.link.title, -1);
  183. }
  184. strbuf_puts(html, "\">");
  185. break;
  186. case NODE_IMAGE:
  187. strbuf_puts(html, "<img src=\"");
  188. if (cur->as.link.url)
  189. escape_href(html, cur->as.link.url, -1);
  190. strbuf_puts(html, "\" alt=\"");
  191. inlines_to_plain_html(html, cur);
  192. if (cur->as.link.title) {
  193. strbuf_puts(html, "\" title=\"");
  194. escape_html(html, cur->as.link.title, -1);
  195. }
  196. strbuf_puts(html, "\" />");
  197. visit_children = false;
  198. break;
  199. case NODE_STRONG:
  200. strbuf_puts(html, "<strong>");
  201. break;
  202. case NODE_EMPH:
  203. strbuf_puts(html, "<em>");
  204. break;
  205. default:
  206. assert(false);
  207. }
  208. if (visit_children && cur->first_child) {
  209. cur = cur->first_child;
  210. continue;
  211. }
  212. next_sibling:
  213. tight = finish_node(html, cur, tight);
  214. if (cur == node) {
  215. break;
  216. }
  217. if (cur->next) {
  218. cur = cur->next;
  219. continue;
  220. }
  221. cur = cur->parent;
  222. goto next_sibling;
  223. }
  224. }
  225. // Returns the restored value of 'tight'.
  226. static bool
  227. finish_node(strbuf *html, cmark_node *node, bool tight)
  228. {
  229. char end_header[] = "</h0>\n";
  230. switch (node->type) {
  231. case NODE_PARAGRAPH:
  232. if (!tight) {
  233. strbuf_puts(html, "</p>\n");
  234. }
  235. break;
  236. case NODE_BLOCK_QUOTE: {
  237. cmark_list *list = &node->as.list;
  238. strbuf_puts(html, "</blockquote>\n");
  239. // Restore old 'tight' value.
  240. tight = list->tight;
  241. list->tight = false;
  242. break;
  243. }
  244. case NODE_LIST_ITEM:
  245. strbuf_puts(html, "</li>\n");
  246. break;
  247. case NODE_LIST: {
  248. cmark_list *list = &node->as.list;
  249. bool tmp;
  250. strbuf_puts(html,
  251. list->list_type == CMARK_BULLET_LIST ?
  252. "</ul>\n" : "</ol>\n");
  253. // Restore old 'tight' value.
  254. tmp = tight;
  255. tight = list->tight;
  256. list->tight = tmp;
  257. break;
  258. }
  259. case NODE_HEADER:
  260. end_header[3] = '0' + node->as.header.level;
  261. strbuf_puts(html, end_header);
  262. break;
  263. case NODE_CODE_BLOCK:
  264. strbuf_puts(html, "</code></pre>\n");
  265. break;
  266. case NODE_INLINE_CODE:
  267. strbuf_puts(html, "</code>");
  268. break;
  269. case NODE_LINK:
  270. strbuf_puts(html, "</a>");
  271. break;
  272. case NODE_STRONG:
  273. strbuf_puts(html, "</strong>");
  274. break;
  275. case NODE_EMPH:
  276. strbuf_puts(html, "</em>");
  277. break;
  278. default:
  279. break;
  280. }
  281. return tight;
  282. }
  283. char *cmark_render_html(cmark_node *root)
  284. {
  285. char *result;
  286. strbuf html = GH_BUF_INIT;
  287. node_to_html(&html, root);
  288. result = (char *)strbuf_detach(&html);
  289. strbuf_free(&html);
  290. return result;
  291. }