aboutsummaryrefslogtreecommitdiff
path: root/api_test/main.c
blob: af40a9f980afe56a071ca34d3a1103a52095a294 (plain)
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <string.h>
  4. #define CMARK_NO_SHORT_NAMES
  5. #include "cmark.h"
  6. #include "node.h"
  7. #include "harness.h"
  8. #include "cplusplus.h"
  9. #define UTF8_REPL "\xEF\xBF\xBD"
  10. static const cmark_node_type node_types[] = {
  11. CMARK_NODE_DOCUMENT,
  12. CMARK_NODE_BLOCK_QUOTE,
  13. CMARK_NODE_LIST,
  14. CMARK_NODE_ITEM,
  15. CMARK_NODE_CODE_BLOCK,
  16. CMARK_NODE_HTML,
  17. CMARK_NODE_PARAGRAPH,
  18. CMARK_NODE_HEADER,
  19. CMARK_NODE_HRULE,
  20. CMARK_NODE_TEXT,
  21. CMARK_NODE_SOFTBREAK,
  22. CMARK_NODE_LINEBREAK,
  23. CMARK_NODE_CODE,
  24. CMARK_NODE_INLINE_HTML,
  25. CMARK_NODE_EMPH,
  26. CMARK_NODE_STRONG,
  27. CMARK_NODE_LINK,
  28. CMARK_NODE_IMAGE
  29. };
  30. static const int num_node_types = sizeof(node_types) / sizeof(*node_types);
  31. static void
  32. test_md_to_html(test_batch_runner *runner, const char *markdown,
  33. const char *expected_html, const char *msg);
  34. static void
  35. test_content(test_batch_runner *runner, cmark_node_type type,
  36. int allowed_content);
  37. static void
  38. test_char(test_batch_runner *runner, int valid, const char *utf8,
  39. const char *msg);
  40. static void
  41. test_incomplete_char(test_batch_runner *runner, const char *utf8,
  42. const char *msg);
  43. static void
  44. test_continuation_byte(test_batch_runner *runner, const char *utf8);
  45. static void
  46. constructor(test_batch_runner *runner)
  47. {
  48. for (int i = 0; i < num_node_types; ++i) {
  49. cmark_node_type type = node_types[i];
  50. cmark_node *node = cmark_node_new(type);
  51. OK(runner, node != NULL, "new type %d", type);
  52. INT_EQ(runner, cmark_node_get_type(node), type,
  53. "get_type %d", type);
  54. switch (node->type) {
  55. case CMARK_NODE_HEADER:
  56. INT_EQ(runner, cmark_node_get_header_level(node), 1,
  57. "default header level is 1");
  58. node->as.header.level = 1;
  59. break;
  60. case CMARK_NODE_LIST:
  61. INT_EQ(runner, cmark_node_get_list_type(node),
  62. CMARK_BULLET_LIST,
  63. "default is list type is bullet");
  64. INT_EQ(runner, cmark_node_get_list_delim(node),
  65. CMARK_NO_DELIM,
  66. "default is list delim is NO_DELIM");
  67. INT_EQ(runner, cmark_node_get_list_start(node), 1,
  68. "default is list start is 1");
  69. INT_EQ(runner, cmark_node_get_list_tight(node), 0,
  70. "default is list is loose");
  71. break;
  72. default:
  73. break;
  74. }
  75. cmark_node_free(node);
  76. }
  77. }
  78. static void
  79. accessors(test_batch_runner *runner)
  80. {
  81. static const char markdown[] =
  82. "## Header\n"
  83. "\n"
  84. "* Item 1\n"
  85. "* Item 2\n"
  86. "\n"
  87. "2. Item 1\n"
  88. "\n"
  89. "3. Item 2\n"
  90. "\n"
  91. "\n"
  92. " code\n"
  93. "\n"
  94. "``` lang\n"
  95. "fenced\n"
  96. "```\n"
  97. "\n"
  98. "<div>html</div>\n"
  99. "\n"
  100. "[link](url 'title')\n";
  101. cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1);
  102. // Getters
  103. cmark_node *header = cmark_node_first_child(doc);
  104. INT_EQ(runner, cmark_node_get_header_level(header), 2,
  105. "get_header_level");
  106. cmark_node *bullet_list = cmark_node_next(header);
  107. INT_EQ(runner, cmark_node_get_list_type(bullet_list),
  108. CMARK_BULLET_LIST, "get_list_type bullet");
  109. INT_EQ(runner, cmark_node_get_list_tight(bullet_list), 1,
  110. "get_list_tight tight");
  111. cmark_node *ordered_list = cmark_node_next(bullet_list);
  112. INT_EQ(runner, cmark_node_get_list_type(ordered_list),
  113. CMARK_ORDERED_LIST, "get_list_type ordered");
  114. INT_EQ(runner, cmark_node_get_list_delim(ordered_list),
  115. CMARK_PERIOD_DELIM, "get_list_delim ordered");
  116. INT_EQ(runner, cmark_node_get_list_start(ordered_list), 2,
  117. "get_list_start");
  118. INT_EQ(runner, cmark_node_get_list_tight(ordered_list), 0,
  119. "get_list_tight loose");
  120. cmark_node *code = cmark_node_next(ordered_list);
  121. STR_EQ(runner, cmark_node_get_literal(code), "code\n",
  122. "get_literal indented code");
  123. cmark_node *fenced = cmark_node_next(code);
  124. STR_EQ(runner, cmark_node_get_literal(fenced), "fenced\n",
  125. "get_literal fenced code");
  126. STR_EQ(runner, cmark_node_get_fence_info(fenced), "lang",
  127. "get_fence_info");
  128. cmark_node *html = cmark_node_next(fenced);
  129. STR_EQ(runner, cmark_node_get_literal(html),
  130. "<div>html</div>\n", "get_literal html");
  131. cmark_node *paragraph = cmark_node_next(html);
  132. INT_EQ(runner, cmark_node_get_start_line(paragraph), 19,
  133. "get_start_line");
  134. INT_EQ(runner, cmark_node_get_start_column(paragraph), 1,
  135. "get_start_column");
  136. INT_EQ(runner, cmark_node_get_end_line(paragraph), 19,
  137. "get_end_line");
  138. cmark_node *link = cmark_node_first_child(paragraph);
  139. STR_EQ(runner, cmark_node_get_url(link), "url",
  140. "get_url");
  141. STR_EQ(runner, cmark_node_get_title(link), "title",
  142. "get_title");
  143. cmark_node *string = cmark_node_first_child(link);
  144. STR_EQ(runner, cmark_node_get_literal(string), "link",
  145. "get_literal string");
  146. // Setters
  147. OK(runner, cmark_node_set_header_level(header, 3),
  148. "set_header_level");
  149. OK(runner, cmark_node_set_list_type(bullet_list, CMARK_ORDERED_LIST),
  150. "set_list_type ordered");
  151. OK(runner, cmark_node_set_list_delim(bullet_list, CMARK_PAREN_DELIM),
  152. "set_list_delim paren");
  153. OK(runner, cmark_node_set_list_start(bullet_list, 3),
  154. "set_list_start");
  155. OK(runner, cmark_node_set_list_tight(bullet_list, 0),
  156. "set_list_tight loose");
  157. OK(runner, cmark_node_set_list_type(ordered_list, CMARK_BULLET_LIST),
  158. "set_list_type bullet");
  159. OK(runner, cmark_node_set_list_tight(ordered_list, 1),
  160. "set_list_tight tight");
  161. OK(runner, cmark_node_set_literal(code, "CODE\n"),
  162. "set_literal indented code");
  163. OK(runner, cmark_node_set_literal(fenced, "FENCED\n"),
  164. "set_literal fenced code");
  165. OK(runner, cmark_node_set_fence_info(fenced, "LANG"),
  166. "set_fence_info");
  167. OK(runner, cmark_node_set_literal(html, "<div>HTML</div>\n"),
  168. "set_literal html");
  169. OK(runner, cmark_node_set_url(link, "URL"),
  170. "set_url");
  171. OK(runner, cmark_node_set_title(link, "TITLE"),
  172. "set_title");
  173. OK(runner, cmark_node_set_literal(string, "LINK"),
  174. "set_literal string");
  175. char *rendered_html = cmark_render_html(doc, CMARK_OPT_DEFAULT);
  176. static const char expected_html[] =
  177. "<h3>Header</h3>\n"
  178. "<ol start=\"3\">\n"
  179. "<li>\n"
  180. "<p>Item 1</p>\n"
  181. "</li>\n"
  182. "<li>\n"
  183. "<p>Item 2</p>\n"
  184. "</li>\n"
  185. "</ol>\n"
  186. "<ul>\n"
  187. "<li>Item 1</li>\n"
  188. "<li>Item 2</li>\n"
  189. "</ul>\n"
  190. "<pre><code>CODE\n"
  191. "</code></pre>\n"
  192. "<pre><code class=\"language-LANG\">FENCED\n"
  193. "</code></pre>\n"
  194. "<div>HTML</div>\n"
  195. "<p><a href=\"URL\" title=\"TITLE\">LINK</a></p>\n";
  196. STR_EQ(runner, rendered_html, expected_html, "setters work");
  197. free(rendered_html);
  198. // Getter errors
  199. INT_EQ(runner, cmark_node_get_header_level(bullet_list), 0,
  200. "get_header_level error");
  201. INT_EQ(runner, cmark_node_get_list_type(header), CMARK_NO_LIST,
  202. "get_list_type error");
  203. INT_EQ(runner, cmark_node_get_list_start(code), 0,
  204. "get_list_start error");
  205. INT_EQ(runner, cmark_node_get_list_tight(fenced), 0,
  206. "get_list_tight error");
  207. OK(runner, cmark_node_get_literal(ordered_list) == NULL,
  208. "get_literal error");
  209. OK(runner, cmark_node_get_fence_info(paragraph) == NULL,
  210. "get_fence_info error");
  211. OK(runner, cmark_node_get_url(html) == NULL,
  212. "get_url error");
  213. OK(runner, cmark_node_get_title(header) == NULL,
  214. "get_title error");
  215. // Setter errors
  216. OK(runner, !cmark_node_set_header_level(bullet_list, 3),
  217. "set_header_level error");
  218. OK(runner, !cmark_node_set_list_type(header, CMARK_ORDERED_LIST),
  219. "set_list_type error");
  220. OK(runner, !cmark_node_set_list_start(code, 3),
  221. "set_list_start error");
  222. OK(runner, !cmark_node_set_list_tight(fenced, 0),
  223. "set_list_tight error");
  224. OK(runner, !cmark_node_set_literal(ordered_list, "content\n"),
  225. "set_literal error");
  226. OK(runner, !cmark_node_set_fence_info(paragraph, "lang"),
  227. "set_fence_info error");
  228. OK(runner, !cmark_node_set_url(html, "url"),
  229. "set_url error");
  230. OK(runner, !cmark_node_set_title(header, "title"),
  231. "set_title error");
  232. OK(runner, !cmark_node_set_header_level(header, 0),
  233. "set_header_level too small");
  234. OK(runner, !cmark_node_set_header_level(header, 7),
  235. "set_header_level too large");
  236. OK(runner, !cmark_node_set_list_type(bullet_list, CMARK_NO_LIST),
  237. "set_list_type invalid");
  238. OK(runner, !cmark_node_set_list_start(bullet_list, -1),
  239. "set_list_start negative");
  240. cmark_node_free(doc);
  241. }
  242. static void
  243. node_check(test_batch_runner *runner) {
  244. // Construct an incomplete tree.
  245. cmark_node *doc = cmark_node_new(CMARK_NODE_DOCUMENT);
  246. cmark_node *p1 = cmark_node_new(CMARK_NODE_PARAGRAPH);
  247. cmark_node *p2 = cmark_node_new(CMARK_NODE_PARAGRAPH);
  248. doc->first_child = p1;
  249. p1->next = p2;
  250. INT_EQ(runner, cmark_node_check(doc, NULL), 4, "node_check works");
  251. INT_EQ(runner, cmark_node_check(doc, NULL), 0,
  252. "node_check fixes tree");
  253. cmark_node_free(doc);
  254. }
  255. static void
  256. iterator(test_batch_runner *runner) {
  257. cmark_node *doc = cmark_parse_document("> a *b*\n\nc", 10);
  258. int parnodes = 0;
  259. cmark_event_type ev_type;
  260. cmark_iter *iter = cmark_iter_new(doc);
  261. cmark_node *cur;
  262. while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
  263. cur = cmark_iter_get_node(iter);
  264. if (cur->type == CMARK_NODE_PARAGRAPH &&
  265. ev_type == CMARK_EVENT_ENTER) {
  266. parnodes += 1;
  267. }
  268. }
  269. INT_EQ(runner, parnodes, 2, "iterate correctly counts paragraphs");
  270. cmark_iter_free(iter);
  271. cmark_node_free(doc);
  272. }
  273. static void
  274. iterator_delete(test_batch_runner *runner) {
  275. static const char md[] =
  276. "a *b* c\n"
  277. "\n"
  278. "* item1\n"
  279. "* item2\n"
  280. "\n"
  281. "a `b` c\n"
  282. "\n"
  283. "* item1\n"
  284. "* item2\n";
  285. cmark_node *doc = cmark_parse_document(md, sizeof(md) - 1);
  286. cmark_iter *iter = cmark_iter_new(doc);
  287. cmark_event_type ev_type;
  288. while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
  289. cmark_node *node = cmark_iter_get_node(iter);
  290. // Delete list, emph, and code nodes.
  291. if ((ev_type == CMARK_EVENT_EXIT &&
  292. node->type == CMARK_NODE_LIST) ||
  293. (ev_type == CMARK_EVENT_EXIT &&
  294. node->type == CMARK_NODE_EMPH) ||
  295. (ev_type == CMARK_EVENT_ENTER &&
  296. node->type == CMARK_NODE_CODE)) {
  297. cmark_node_free(node);
  298. }
  299. }
  300. char *html = cmark_render_html(doc, CMARK_OPT_DEFAULT);
  301. static const char expected[] =
  302. "<p>a c</p>\n"
  303. "<p>a c</p>\n";
  304. STR_EQ(runner, html, expected, "iterate and delete nodes");
  305. free(html);
  306. cmark_iter_free(iter);
  307. cmark_node_free(doc);
  308. }
  309. static void
  310. create_tree(test_batch_runner *runner)
  311. {
  312. char *html;
  313. cmark_node *doc = cmark_node_new(CMARK_NODE_DOCUMENT);
  314. cmark_node *p = cmark_node_new(CMARK_NODE_PARAGRAPH);
  315. OK(runner, !cmark_node_insert_before(doc, p),
  316. "insert before root fails");
  317. OK(runner, !cmark_node_insert_after(doc, p),
  318. "insert after root fails");
  319. OK(runner, cmark_node_append_child(doc, p), "append1");
  320. INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append1 consistent");
  321. OK(runner, cmark_node_parent(p) == doc, "node_parent");
  322. cmark_node *emph = cmark_node_new(CMARK_NODE_EMPH);
  323. OK(runner, cmark_node_prepend_child(p, emph), "prepend1");
  324. INT_EQ(runner, cmark_node_check(doc, NULL), 0, "prepend1 consistent");
  325. cmark_node *str1 = cmark_node_new(CMARK_NODE_TEXT);
  326. cmark_node_set_literal(str1, "Hello, ");
  327. OK(runner, cmark_node_prepend_child(p, str1), "prepend2");
  328. INT_EQ(runner, cmark_node_check(doc, NULL), 0, "prepend2 consistent");
  329. cmark_node *str3 = cmark_node_new(CMARK_NODE_TEXT);
  330. cmark_node_set_literal(str3, "!");
  331. OK(runner, cmark_node_append_child(p, str3), "append2");
  332. INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append2 consistent");
  333. cmark_node *str2 = cmark_node_new(CMARK_NODE_TEXT);
  334. cmark_node_set_literal(str2, "world");
  335. OK(runner, cmark_node_append_child(emph, str2), "append3");
  336. INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append3 consistent");
  337. html = cmark_render_html(doc, CMARK_OPT_DEFAULT);
  338. STR_EQ(runner, html, "<p>Hello, <em>world</em>!</p>\n",
  339. "render_html");
  340. free(html);
  341. OK(runner, cmark_node_insert_before(str1, str3), "ins before1");
  342. INT_EQ(runner, cmark_node_check(doc, NULL), 0,
  343. "ins before1 consistent");
  344. // 31e
  345. OK(runner, cmark_node_first_child(p) == str3, "ins before1 works");
  346. OK(runner, cmark_node_insert_before(str1, emph), "ins before2");
  347. INT_EQ(runner, cmark_node_check(doc, NULL), 0,
  348. "ins before2 consistent");
  349. // 3e1
  350. OK(runner, cmark_node_last_child(p) == str1, "ins before2 works");
  351. OK(runner, cmark_node_insert_after(str1, str3), "ins after1");
  352. INT_EQ(runner, cmark_node_check(doc, NULL), 0,
  353. "ins after1 consistent");
  354. // e13
  355. OK(runner, cmark_node_next(str1) == str3, "ins after1 works");
  356. OK(runner, cmark_node_insert_after(str1, emph), "ins after2");
  357. INT_EQ(runner, cmark_node_check(doc, NULL), 0,
  358. "ins after2 consistent");
  359. // 1e3
  360. OK(runner, cmark_node_previous(emph) == str1, "ins after2 works");
  361. cmark_node_unlink(emph);
  362. html = cmark_render_html(doc, CMARK_OPT_DEFAULT);
  363. STR_EQ(runner, html, "<p>Hello, !</p>\n",
  364. "render_html after shuffling");
  365. free(html);
  366. cmark_node_free(doc);
  367. // TODO: Test that the contents of an unlinked inline are valid
  368. // after the parent block was destroyed. This doesn't work so far.
  369. cmark_node_free(emph);
  370. }
  371. void
  372. hierarchy(test_batch_runner *runner)
  373. {
  374. cmark_node *bquote1 = cmark_node_new(CMARK_NODE_BLOCK_QUOTE);
  375. cmark_node *bquote2 = cmark_node_new(CMARK_NODE_BLOCK_QUOTE);
  376. cmark_node *bquote3 = cmark_node_new(CMARK_NODE_BLOCK_QUOTE);
  377. OK(runner, cmark_node_append_child(bquote1, bquote2),
  378. "append bquote2");
  379. OK(runner, cmark_node_append_child(bquote2, bquote3),
  380. "append bquote3");
  381. OK(runner, !cmark_node_append_child(bquote3, bquote3),
  382. "adding a node as child of itself fails");
  383. OK(runner, !cmark_node_append_child(bquote3, bquote1),
  384. "adding a parent as child fails");
  385. cmark_node_free(bquote1);
  386. int max_node_type = CMARK_NODE_LAST_BLOCK > CMARK_NODE_LAST_INLINE
  387. ? CMARK_NODE_LAST_BLOCK : CMARK_NODE_LAST_INLINE;
  388. OK(runner, max_node_type < 32, "all node types < 32");
  389. int list_item_flag = 1 << CMARK_NODE_ITEM;
  390. int top_level_blocks =
  391. (1 << CMARK_NODE_BLOCK_QUOTE) |
  392. (1 << CMARK_NODE_LIST) |
  393. (1 << CMARK_NODE_CODE_BLOCK) |
  394. (1 << CMARK_NODE_HTML) |
  395. (1 << CMARK_NODE_PARAGRAPH) |
  396. (1 << CMARK_NODE_HEADER) |
  397. (1 << CMARK_NODE_HRULE);
  398. int all_inlines =
  399. (1 << CMARK_NODE_TEXT) |
  400. (1 << CMARK_NODE_SOFTBREAK) |
  401. (1 << CMARK_NODE_LINEBREAK) |
  402. (1 << CMARK_NODE_CODE) |
  403. (1 << CMARK_NODE_INLINE_HTML) |
  404. (1 << CMARK_NODE_EMPH) |
  405. (1 << CMARK_NODE_STRONG) |
  406. (1 << CMARK_NODE_LINK) |
  407. (1 << CMARK_NODE_IMAGE);
  408. test_content(runner, CMARK_NODE_DOCUMENT, top_level_blocks);
  409. test_content(runner, CMARK_NODE_BLOCK_QUOTE, top_level_blocks);
  410. test_content(runner, CMARK_NODE_LIST, list_item_flag);
  411. test_content(runner, CMARK_NODE_ITEM, top_level_blocks);
  412. test_content(runner, CMARK_NODE_CODE_BLOCK , 0);
  413. test_content(runner, CMARK_NODE_HTML, 0);
  414. test_content(runner, CMARK_NODE_PARAGRAPH, all_inlines);
  415. test_content(runner, CMARK_NODE_HEADER, all_inlines);
  416. test_content(runner, CMARK_NODE_HRULE, 0);
  417. test_content(runner, CMARK_NODE_TEXT, 0);
  418. test_content(runner, CMARK_NODE_SOFTBREAK, 0);
  419. test_content(runner, CMARK_NODE_LINEBREAK, 0);
  420. test_content(runner, CMARK_NODE_CODE, 0);
  421. test_content(runner, CMARK_NODE_INLINE_HTML, 0);
  422. test_content(runner, CMARK_NODE_EMPH, all_inlines);
  423. test_content(runner, CMARK_NODE_STRONG, all_inlines);
  424. test_content(runner, CMARK_NODE_LINK, all_inlines);
  425. test_content(runner, CMARK_NODE_IMAGE, all_inlines);
  426. }
  427. static void
  428. test_content(test_batch_runner *runner, cmark_node_type type,
  429. int allowed_content)
  430. {
  431. cmark_node *node = cmark_node_new(type);
  432. for (int i = 0; i < num_node_types; ++i) {
  433. cmark_node_type child_type = node_types[i];
  434. cmark_node *child = cmark_node_new(child_type);
  435. int got = cmark_node_append_child(node, child);
  436. int expected = (allowed_content >> child_type) & 1;
  437. INT_EQ(runner, got, expected,
  438. "add %d as child of %d", child_type, type);
  439. cmark_node_free(child);
  440. }
  441. cmark_node_free(node);
  442. }
  443. static void
  444. parser(test_batch_runner *runner)
  445. {
  446. test_md_to_html(runner, "No newline", "<p>No newline</p>\n",
  447. "document without trailing newline");
  448. }
  449. static void
  450. render_html(test_batch_runner *runner)
  451. {
  452. char *html;
  453. static const char markdown[] =
  454. "foo *bar*\n"
  455. "\n"
  456. "paragraph 2\n";
  457. cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1);
  458. cmark_node *paragraph = cmark_node_first_child(doc);
  459. html = cmark_render_html(paragraph, CMARK_OPT_DEFAULT);
  460. STR_EQ(runner, html, "<p>foo <em>bar</em></p>\n",
  461. "render single paragraph");
  462. free(html);
  463. cmark_node *string = cmark_node_first_child(paragraph);
  464. html = cmark_render_html(string, CMARK_OPT_DEFAULT);
  465. STR_EQ(runner, html, "foo ", "render single inline");
  466. free(html);
  467. cmark_node *emph = cmark_node_next(string);
  468. html = cmark_render_html(emph, CMARK_OPT_DEFAULT);
  469. STR_EQ(runner, html, "<em>bar</em>", "render inline with children");
  470. free(html);
  471. cmark_node_free(doc);
  472. }
  473. static void
  474. utf8(test_batch_runner *runner)
  475. {
  476. // Ranges
  477. test_char(runner, 1, "\x01", "valid utf8 01");
  478. test_char(runner, 1, "\x7F", "valid utf8 7F");
  479. test_char(runner, 0, "\x80", "invalid utf8 80");
  480. test_char(runner, 0, "\xBF", "invalid utf8 BF");
  481. test_char(runner, 0, "\xC0\x80", "invalid utf8 C080");
  482. test_char(runner, 0, "\xC1\xBF", "invalid utf8 C1BF");
  483. test_char(runner, 1, "\xC2\x80", "valid utf8 C280");
  484. test_char(runner, 1, "\xDF\xBF", "valid utf8 DFBF");
  485. test_char(runner, 0, "\xE0\x80\x80", "invalid utf8 E08080");
  486. test_char(runner, 0, "\xE0\x9F\xBF", "invalid utf8 E09FBF");
  487. test_char(runner, 1, "\xE0\xA0\x80", "valid utf8 E0A080");
  488. test_char(runner, 1, "\xED\x9F\xBF", "valid utf8 ED9FBF");
  489. test_char(runner, 0, "\xED\xA0\x80", "invalid utf8 EDA080");
  490. test_char(runner, 0, "\xED\xBF\xBF", "invalid utf8 EDBFBF");
  491. test_char(runner, 0, "\xF0\x80\x80\x80", "invalid utf8 F0808080");
  492. test_char(runner, 0, "\xF0\x8F\xBF\xBF", "invalid utf8 F08FBFBF");
  493. test_char(runner, 1, "\xF0\x90\x80\x80", "valid utf8 F0908080");
  494. test_char(runner, 1, "\xF4\x8F\xBF\xBF", "valid utf8 F48FBFBF");
  495. test_char(runner, 0, "\xF4\x90\x80\x80", "invalid utf8 F4908080");
  496. test_char(runner, 0, "\xF7\xBF\xBF\xBF", "invalid utf8 F7BFBFBF");
  497. test_char(runner, 0, "\xF8", "invalid utf8 F8");
  498. test_char(runner, 0, "\xFF", "invalid utf8 FF");
  499. // Incomplete byte sequences at end of input
  500. test_incomplete_char(runner, "\xE0\xA0", "invalid utf8 E0A0");
  501. test_incomplete_char(runner, "\xF0\x90\x80", "invalid utf8 F09080");
  502. // Invalid continuation bytes
  503. test_continuation_byte(runner, "\xC2\x80");
  504. test_continuation_byte(runner, "\xE0\xA0\x80");
  505. test_continuation_byte(runner, "\xF0\x90\x80\x80");
  506. // Test string containing null character
  507. static const char string_with_null[] = "((((\0))))";
  508. char *html = cmark_markdown_to_html(string_with_null,
  509. sizeof(string_with_null) - 1);
  510. STR_EQ(runner, html, "<p>((((" UTF8_REPL "))))</p>\n",
  511. "utf8 with U+0000");
  512. free(html);
  513. }
  514. static void
  515. test_char(test_batch_runner *runner, int valid, const char *utf8,
  516. const char *msg)
  517. {
  518. char buf[20];
  519. sprintf(buf, "((((%s))))", utf8);
  520. if (valid) {
  521. char expected[30];
  522. sprintf(expected, "<p>((((%s))))</p>\n", utf8);
  523. test_md_to_html(runner, buf, expected, msg);
  524. }
  525. else {
  526. test_md_to_html(runner, buf, "<p>((((" UTF8_REPL "))))</p>\n",
  527. msg);
  528. }
  529. }
  530. static void
  531. test_incomplete_char(test_batch_runner *runner, const char *utf8,
  532. const char *msg)
  533. {
  534. char buf[20];
  535. sprintf(buf, "----%s", utf8);
  536. test_md_to_html(runner, buf, "<p>----" UTF8_REPL "</p>\n", msg);
  537. }
  538. static void
  539. test_continuation_byte(test_batch_runner *runner, const char *utf8)
  540. {
  541. int len = strlen(utf8);
  542. for (int pos = 1; pos < len; ++pos) {
  543. char buf[20];
  544. sprintf(buf, "((((%s))))", utf8);
  545. buf[4+pos] = '\x20';
  546. char expected[50];
  547. strcpy(expected, "<p>((((" UTF8_REPL "\x20");
  548. for (int i = pos + 1; i < len; ++i) {
  549. strcat(expected, UTF8_REPL);
  550. }
  551. strcat(expected, "))))</p>\n");
  552. char *html = cmark_markdown_to_html(buf, strlen(buf));
  553. STR_EQ(runner, html, expected,
  554. "invalid utf8 continuation byte %d/%d", pos, len);
  555. free(html);
  556. }
  557. }
  558. static void
  559. test_md_to_html(test_batch_runner *runner, const char *markdown,
  560. const char *expected_html, const char *msg)
  561. {
  562. char *html = cmark_markdown_to_html(markdown, strlen(markdown));
  563. STR_EQ(runner, html, expected_html, msg);
  564. free(html);
  565. }
  566. int main() {
  567. int retval;
  568. test_batch_runner *runner = test_batch_runner_new();
  569. constructor(runner);
  570. accessors(runner);
  571. node_check(runner);
  572. iterator(runner);
  573. iterator_delete(runner);
  574. create_tree(runner);
  575. hierarchy(runner);
  576. parser(runner);
  577. render_html(runner);
  578. utf8(runner);
  579. test_cplusplus(runner);
  580. test_print_summary(runner);
  581. retval = test_ok(runner) ? 0 : 1;
  582. free(runner);
  583. return retval;
  584. }