aboutsummaryrefslogtreecommitdiff
path: root/src/html.c
blob: aeec5f1f1cc2e9607e8ee90212c95e0d0c037087 (plain)
  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <stdbool.h>
  4. #include "bstrlib.h"
  5. #include "stmd.h"
  6. #include "debug.h"
  7. #include "scanners.h"
  8. // Functions to convert block and inline lists to HTML strings.
  9. // Escape special characters in HTML. More efficient than
  10. // three calls to bfindreplace. If preserve_entities is set,
  11. // existing entities are left alone.
  12. static bstring escape_html(bstring inp, bool preserve_entities)
  13. {
  14. int pos = 0;
  15. int match;
  16. char c;
  17. bstring escapable = blk2bstr("&<>\"", 4);
  18. bstring ent;
  19. bstring s = bstrcpy(inp);
  20. while ((pos = binchr(s, pos, escapable)) != BSTR_ERR) {
  21. c = bchar(s,pos);
  22. switch (c) {
  23. case '<':
  24. bdelete(s, pos, 1);
  25. ent = blk2bstr("&lt;", 4);
  26. binsert(s, pos, ent, ' ');
  27. bdestroy(ent);
  28. pos += 4;
  29. break;
  30. case '>':
  31. bdelete(s, pos, 1);
  32. ent = blk2bstr("&gt;", 4);
  33. binsert(s, pos, ent, ' ');
  34. bdestroy(ent);
  35. pos += 4;
  36. break;
  37. case '&':
  38. if (preserve_entities && (match = scan_entity(s, pos))) {
  39. pos += match;
  40. } else {
  41. bdelete(s, pos, 1);
  42. ent = blk2bstr("&amp;", 5);
  43. binsert(s, pos, ent, ' ');
  44. bdestroy(ent);
  45. pos += 5;
  46. }
  47. break;
  48. case '"':
  49. bdelete(s, pos, 1);
  50. ent = blk2bstr("&quot;", 6);
  51. binsert(s, pos, ent, ' ');
  52. bdestroy(ent);
  53. pos += 6;
  54. break;
  55. default:
  56. bdelete(s, pos, 1);
  57. log_err("unexpected character %02x", c);
  58. }
  59. }
  60. bdestroy(escapable);
  61. return s;
  62. }
  63. static inline void cr(bstring buffer)
  64. {
  65. int c = bchar(buffer, blength(buffer) - 1);
  66. if (c != '\n' && c) {
  67. bconchar(buffer, '\n');
  68. }
  69. }
  70. // Convert a block list to HTML. Returns 0 on success, and sets result.
  71. extern int blocks_to_html(block* b, bstring* result, bool tight)
  72. {
  73. bstring contents = NULL;
  74. bstring escaped, escaped2;
  75. struct bstrList * info_words;
  76. struct ListData * data;
  77. bstring mbstart;
  78. bstring html = blk2bstr("", 0);
  79. while(b != NULL) {
  80. switch(b->tag) {
  81. case document:
  82. check(blocks_to_html(b->children, &contents, false) == 0,
  83. "error converting blocks to html");
  84. bformata(html, "%s", contents->data);
  85. bdestroy(contents);
  86. break;
  87. case paragraph:
  88. check(inlines_to_html(b->inline_content, &contents) == 0,
  89. "error converting inlines to html");
  90. if (tight) {
  91. bformata(html, "%s", contents->data);
  92. } else {
  93. cr(html);
  94. bformata(html, "<p>%s</p>", contents->data);
  95. cr(html);
  96. }
  97. bdestroy(contents);
  98. break;
  99. case block_quote:
  100. check(blocks_to_html(b->children, &contents, false) == 0,
  101. "error converting blocks to html");
  102. cr(html);
  103. bformata(html, "<blockquote>\n%s</blockquote>", contents->data);
  104. cr(html);
  105. bdestroy(contents);
  106. break;
  107. case list_item:
  108. check(blocks_to_html(b->children, &contents, tight) == 0,
  109. "error converting blocks to html");
  110. brtrimws(contents);
  111. cr(html);
  112. bformata(html, "<li>%s</li>", contents->data);
  113. cr(html);
  114. bdestroy(contents);
  115. break;
  116. case list:
  117. // make sure a list starts at the beginning of the line:
  118. cr(html);
  119. data = &(b->attributes.list_data);
  120. check(blocks_to_html(b->children, &contents, data->tight) == 0,
  121. "error converting blocks to html");
  122. mbstart = bformat(" start=\"%d\"", data->start);
  123. bformata(html, "<%s%s>\n%s</%s>",
  124. data->list_type == bullet ? "ul" : "ol",
  125. data->start == 1 ? "" : (char*) mbstart->data,
  126. contents->data,
  127. data->list_type == bullet ? "ul" : "ol");
  128. cr(html);
  129. bdestroy(contents);
  130. bdestroy(mbstart);
  131. break;
  132. case atx_header:
  133. case setext_header:
  134. check(inlines_to_html(b->inline_content, &contents) == 0,
  135. "error converting inlines to html");
  136. cr(html);
  137. bformata(html, "<h%d>%s</h%d>",
  138. b->attributes.header_level,
  139. contents->data,
  140. b->attributes.header_level);
  141. cr(html);
  142. bdestroy(contents);
  143. break;
  144. case indented_code:
  145. escaped = escape_html(b->string_content, false);
  146. cr(html);
  147. bformata(html, "<pre><code>%s</code></pre>", escaped->data);
  148. cr(html);
  149. bdestroy(escaped);
  150. break;
  151. case fenced_code:
  152. escaped = escape_html(b->string_content, false);
  153. cr(html);
  154. bformata(html, "<pre><code");
  155. if (blength(b->attributes.fenced_code_data.info) > 0) {
  156. escaped2 = escape_html(b->attributes.fenced_code_data.info, true);
  157. info_words = bsplit(escaped2, ' ');
  158. bformata(html, " class=\"language-%s\"", info_words->entry[0]->data);
  159. bdestroy(escaped2);
  160. bstrListDestroy(info_words);
  161. }
  162. bformata(html, ">%s</code></pre>", escaped->data);
  163. cr(html);
  164. bdestroy(escaped);
  165. break;
  166. case html_block:
  167. bformata(html, "%s", b->string_content->data);
  168. break;
  169. case hrule:
  170. bformata(html, "<hr />");
  171. cr(html);
  172. break;
  173. case reference_def:
  174. break;
  175. default:
  176. log_warn("block type %d not implemented\n", b->tag);
  177. break;
  178. }
  179. b = b->next;
  180. }
  181. *result = html;
  182. return 0;
  183. error:
  184. return -1;
  185. }
  186. // Convert an inline list to HTML. Returns 0 on success, and sets result.
  187. extern int inlines_to_html(inl* ils, bstring* result)
  188. {
  189. bstring contents = NULL;
  190. bstring html = blk2bstr("", 0);
  191. bstring mbtitle, escaped, escaped2;
  192. while(ils != NULL) {
  193. switch(ils->tag) {
  194. case str:
  195. escaped = escape_html(ils->content.literal, false);
  196. bformata(html, "%s", escaped->data);
  197. bdestroy(escaped);
  198. break;
  199. case linebreak:
  200. bformata(html, "<br />\n");
  201. break;
  202. case softbreak:
  203. bformata(html, "\n");
  204. break;
  205. case code:
  206. escaped = escape_html(ils->content.literal, false);
  207. bformata(html, "<code>%s</code>", escaped->data);
  208. bdestroy(escaped);
  209. break;
  210. case raw_html:
  211. case entity:
  212. bformata(html, "%s", ils->content.literal->data);
  213. break;
  214. case link:
  215. check(inlines_to_html(ils->content.inlines, &contents) == 0,
  216. "error converting inlines to html");
  217. if (blength(ils->content.linkable.title) > 0) {
  218. escaped = escape_html(ils->content.linkable.title, true);
  219. mbtitle = bformat(" title=\"%s\"", escaped->data);
  220. bdestroy(escaped);
  221. } else {
  222. mbtitle = blk2bstr("",0);
  223. }
  224. escaped = escape_html(ils->content.linkable.url, true);
  225. bformata(html, "<a href=\"%s\"%s>%s</a>",
  226. escaped->data,
  227. mbtitle->data,
  228. contents->data);
  229. bdestroy(escaped);
  230. bdestroy(mbtitle);
  231. bdestroy(contents);
  232. break;
  233. case image:
  234. check(inlines_to_html(ils->content.inlines, &contents) == 0,
  235. "error converting inlines to html");
  236. escaped = escape_html(ils->content.linkable.url, true);
  237. escaped2 = escape_html(contents, false);
  238. bdestroy(contents);
  239. bformata(html, "<img src=\"%s\" alt=\"%s\"",
  240. escaped->data, escaped2->data);
  241. bdestroy(escaped);
  242. bdestroy(escaped2);
  243. if (blength(ils->content.linkable.title) > 0) {
  244. escaped = escape_html(ils->content.linkable.title, true);
  245. bformata(html, " title=\"%s\"", escaped->data);
  246. bdestroy(escaped);
  247. }
  248. bformata(html, " />");
  249. break;
  250. case strong:
  251. check(inlines_to_html(ils->content.inlines, &contents) == 0,
  252. "error converting inlines to html");
  253. bformata(html, "<strong>%s</strong>", contents->data);
  254. bdestroy(contents);
  255. break;
  256. case emph:
  257. check(inlines_to_html(ils->content.inlines, &contents) == 0,
  258. "error converting inlines to html");
  259. bformata(html, "<em>%s</em>", contents->data);
  260. bdestroy(contents);
  261. break;
  262. }
  263. ils = ils->next;
  264. }
  265. *result = html;
  266. return 0;
  267. error:
  268. return -1;
  269. }