aboutsummaryrefslogtreecommitdiff
path: root/api_test/main.c
blob: d341246325ac0e57bb9ed8b84f594e89b6119271 (plain)
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <string.h>
  4. #define CMARK_NO_SHORT_NAMES
  5. #include "cmark.h"
  6. #include "node.h"
  7. #include "harness.h"
  8. #include "cplusplus.h"
  9. #define UTF8_REPL "\xEF\xBF\xBD"
  10. static const cmark_node_type node_types[] = {
  11. CMARK_NODE_DOCUMENT,
  12. CMARK_NODE_BLOCK_QUOTE,
  13. CMARK_NODE_LIST,
  14. CMARK_NODE_ITEM,
  15. CMARK_NODE_CODE_BLOCK,
  16. CMARK_NODE_HTML,
  17. CMARK_NODE_PARAGRAPH,
  18. CMARK_NODE_HEADER,
  19. CMARK_NODE_HRULE,
  20. CMARK_NODE_TEXT,
  21. CMARK_NODE_SOFTBREAK,
  22. CMARK_NODE_LINEBREAK,
  23. CMARK_NODE_CODE,
  24. CMARK_NODE_INLINE_HTML,
  25. CMARK_NODE_EMPH,
  26. CMARK_NODE_STRONG,
  27. CMARK_NODE_LINK,
  28. CMARK_NODE_IMAGE
  29. };
  30. static const int num_node_types = sizeof(node_types) / sizeof(*node_types);
  31. static void
  32. test_md_to_html(test_batch_runner *runner, const char *markdown,
  33. const char *expected_html, const char *msg);
  34. static void
  35. test_content(test_batch_runner *runner, cmark_node_type type,
  36. int allowed_content);
  37. static void
  38. test_char(test_batch_runner *runner, int valid, const char *utf8,
  39. const char *msg);
  40. static void
  41. test_incomplete_char(test_batch_runner *runner, const char *utf8,
  42. const char *msg);
  43. static void
  44. test_continuation_byte(test_batch_runner *runner, const char *utf8);
  45. static void
  46. version(test_batch_runner *runner)
  47. {
  48. INT_EQ(runner, cmark_version, CMARK_VERSION, "cmark_version");
  49. STR_EQ(runner, cmark_version_string, CMARK_VERSION_STRING,
  50. "cmark_version_string");
  51. }
  52. static void
  53. constructor(test_batch_runner *runner)
  54. {
  55. for (int i = 0; i < num_node_types; ++i) {
  56. cmark_node_type type = node_types[i];
  57. cmark_node *node = cmark_node_new(type);
  58. OK(runner, node != NULL, "new type %d", type);
  59. INT_EQ(runner, cmark_node_get_type(node), type,
  60. "get_type %d", type);
  61. switch (node->type) {
  62. case CMARK_NODE_HEADER:
  63. INT_EQ(runner, cmark_node_get_header_level(node), 1,
  64. "default header level is 1");
  65. node->as.header.level = 1;
  66. break;
  67. case CMARK_NODE_LIST:
  68. INT_EQ(runner, cmark_node_get_list_type(node),
  69. CMARK_BULLET_LIST,
  70. "default is list type is bullet");
  71. INT_EQ(runner, cmark_node_get_list_delim(node),
  72. CMARK_NO_DELIM,
  73. "default is list delim is NO_DELIM");
  74. INT_EQ(runner, cmark_node_get_list_start(node), 1,
  75. "default is list start is 1");
  76. INT_EQ(runner, cmark_node_get_list_tight(node), 0,
  77. "default is list is loose");
  78. break;
  79. default:
  80. break;
  81. }
  82. cmark_node_free(node);
  83. }
  84. }
  85. static void
  86. accessors(test_batch_runner *runner)
  87. {
  88. static const char markdown[] =
  89. "## Header\n"
  90. "\n"
  91. "* Item 1\n"
  92. "* Item 2\n"
  93. "\n"
  94. "2. Item 1\n"
  95. "\n"
  96. "3. Item 2\n"
  97. "\n"
  98. "\n"
  99. " code\n"
  100. "\n"
  101. "``` lang\n"
  102. "fenced\n"
  103. "```\n"
  104. "\n"
  105. "<div>html</div>\n"
  106. "\n"
  107. "[link](url 'title')\n";
  108. cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1);
  109. // Getters
  110. cmark_node *header = cmark_node_first_child(doc);
  111. INT_EQ(runner, cmark_node_get_header_level(header), 2,
  112. "get_header_level");
  113. cmark_node *bullet_list = cmark_node_next(header);
  114. INT_EQ(runner, cmark_node_get_list_type(bullet_list),
  115. CMARK_BULLET_LIST, "get_list_type bullet");
  116. INT_EQ(runner, cmark_node_get_list_tight(bullet_list), 1,
  117. "get_list_tight tight");
  118. cmark_node *ordered_list = cmark_node_next(bullet_list);
  119. INT_EQ(runner, cmark_node_get_list_type(ordered_list),
  120. CMARK_ORDERED_LIST, "get_list_type ordered");
  121. INT_EQ(runner, cmark_node_get_list_delim(ordered_list),
  122. CMARK_PERIOD_DELIM, "get_list_delim ordered");
  123. INT_EQ(runner, cmark_node_get_list_start(ordered_list), 2,
  124. "get_list_start");
  125. INT_EQ(runner, cmark_node_get_list_tight(ordered_list), 0,
  126. "get_list_tight loose");
  127. cmark_node *code = cmark_node_next(ordered_list);
  128. STR_EQ(runner, cmark_node_get_literal(code), "code\n",
  129. "get_literal indented code");
  130. cmark_node *fenced = cmark_node_next(code);
  131. STR_EQ(runner, cmark_node_get_literal(fenced), "fenced\n",
  132. "get_literal fenced code");
  133. STR_EQ(runner, cmark_node_get_fence_info(fenced), "lang",
  134. "get_fence_info");
  135. cmark_node *html = cmark_node_next(fenced);
  136. STR_EQ(runner, cmark_node_get_literal(html),
  137. "<div>html</div>\n", "get_literal html");
  138. cmark_node *paragraph = cmark_node_next(html);
  139. INT_EQ(runner, cmark_node_get_start_line(paragraph), 19,
  140. "get_start_line");
  141. INT_EQ(runner, cmark_node_get_start_column(paragraph), 1,
  142. "get_start_column");
  143. INT_EQ(runner, cmark_node_get_end_line(paragraph), 19,
  144. "get_end_line");
  145. cmark_node *link = cmark_node_first_child(paragraph);
  146. STR_EQ(runner, cmark_node_get_url(link), "url",
  147. "get_url");
  148. STR_EQ(runner, cmark_node_get_title(link), "title",
  149. "get_title");
  150. cmark_node *string = cmark_node_first_child(link);
  151. STR_EQ(runner, cmark_node_get_literal(string), "link",
  152. "get_literal string");
  153. // Setters
  154. OK(runner, cmark_node_set_header_level(header, 3),
  155. "set_header_level");
  156. OK(runner, cmark_node_set_list_type(bullet_list, CMARK_ORDERED_LIST),
  157. "set_list_type ordered");
  158. OK(runner, cmark_node_set_list_delim(bullet_list, CMARK_PAREN_DELIM),
  159. "set_list_delim paren");
  160. OK(runner, cmark_node_set_list_start(bullet_list, 3),
  161. "set_list_start");
  162. OK(runner, cmark_node_set_list_tight(bullet_list, 0),
  163. "set_list_tight loose");
  164. OK(runner, cmark_node_set_list_type(ordered_list, CMARK_BULLET_LIST),
  165. "set_list_type bullet");
  166. OK(runner, cmark_node_set_list_tight(ordered_list, 1),
  167. "set_list_tight tight");
  168. OK(runner, cmark_node_set_literal(code, "CODE\n"),
  169. "set_literal indented code");
  170. OK(runner, cmark_node_set_literal(fenced, "FENCED\n"),
  171. "set_literal fenced code");
  172. OK(runner, cmark_node_set_fence_info(fenced, "LANG"),
  173. "set_fence_info");
  174. OK(runner, cmark_node_set_literal(html, "<div>HTML</div>\n"),
  175. "set_literal html");
  176. OK(runner, cmark_node_set_url(link, "URL"),
  177. "set_url");
  178. OK(runner, cmark_node_set_title(link, "TITLE"),
  179. "set_title");
  180. OK(runner, cmark_node_set_literal(string, "LINK"),
  181. "set_literal string");
  182. char *rendered_html = cmark_render_html(doc, CMARK_OPT_DEFAULT);
  183. static const char expected_html[] =
  184. "<h3>Header</h3>\n"
  185. "<ol start=\"3\">\n"
  186. "<li>\n"
  187. "<p>Item 1</p>\n"
  188. "</li>\n"
  189. "<li>\n"
  190. "<p>Item 2</p>\n"
  191. "</li>\n"
  192. "</ol>\n"
  193. "<ul>\n"
  194. "<li>Item 1</li>\n"
  195. "<li>Item 2</li>\n"
  196. "</ul>\n"
  197. "<pre><code>CODE\n"
  198. "</code></pre>\n"
  199. "<pre><code class=\"language-LANG\">FENCED\n"
  200. "</code></pre>\n"
  201. "<div>HTML</div>\n"
  202. "<p><a href=\"URL\" title=\"TITLE\">LINK</a></p>\n";
  203. STR_EQ(runner, rendered_html, expected_html, "setters work");
  204. free(rendered_html);
  205. // Getter errors
  206. INT_EQ(runner, cmark_node_get_header_level(bullet_list), 0,
  207. "get_header_level error");
  208. INT_EQ(runner, cmark_node_get_list_type(header), CMARK_NO_LIST,
  209. "get_list_type error");
  210. INT_EQ(runner, cmark_node_get_list_start(code), 0,
  211. "get_list_start error");
  212. INT_EQ(runner, cmark_node_get_list_tight(fenced), 0,
  213. "get_list_tight error");
  214. OK(runner, cmark_node_get_literal(ordered_list) == NULL,
  215. "get_literal error");
  216. OK(runner, cmark_node_get_fence_info(paragraph) == NULL,
  217. "get_fence_info error");
  218. OK(runner, cmark_node_get_url(html) == NULL,
  219. "get_url error");
  220. OK(runner, cmark_node_get_title(header) == NULL,
  221. "get_title error");
  222. // Setter errors
  223. OK(runner, !cmark_node_set_header_level(bullet_list, 3),
  224. "set_header_level error");
  225. OK(runner, !cmark_node_set_list_type(header, CMARK_ORDERED_LIST),
  226. "set_list_type error");
  227. OK(runner, !cmark_node_set_list_start(code, 3),
  228. "set_list_start error");
  229. OK(runner, !cmark_node_set_list_tight(fenced, 0),
  230. "set_list_tight error");
  231. OK(runner, !cmark_node_set_literal(ordered_list, "content\n"),
  232. "set_literal error");
  233. OK(runner, !cmark_node_set_fence_info(paragraph, "lang"),
  234. "set_fence_info error");
  235. OK(runner, !cmark_node_set_url(html, "url"),
  236. "set_url error");
  237. OK(runner, !cmark_node_set_title(header, "title"),
  238. "set_title error");
  239. OK(runner, !cmark_node_set_header_level(header, 0),
  240. "set_header_level too small");
  241. OK(runner, !cmark_node_set_header_level(header, 7),
  242. "set_header_level too large");
  243. OK(runner, !cmark_node_set_list_type(bullet_list, CMARK_NO_LIST),
  244. "set_list_type invalid");
  245. OK(runner, !cmark_node_set_list_start(bullet_list, -1),
  246. "set_list_start negative");
  247. cmark_node_free(doc);
  248. }
  249. static void
  250. node_check(test_batch_runner *runner) {
  251. // Construct an incomplete tree.
  252. cmark_node *doc = cmark_node_new(CMARK_NODE_DOCUMENT);
  253. cmark_node *p1 = cmark_node_new(CMARK_NODE_PARAGRAPH);
  254. cmark_node *p2 = cmark_node_new(CMARK_NODE_PARAGRAPH);
  255. doc->first_child = p1;
  256. p1->next = p2;
  257. INT_EQ(runner, cmark_node_check(doc, NULL), 4, "node_check works");
  258. INT_EQ(runner, cmark_node_check(doc, NULL), 0,
  259. "node_check fixes tree");
  260. cmark_node_free(doc);
  261. }
  262. static void
  263. iterator(test_batch_runner *runner) {
  264. cmark_node *doc = cmark_parse_document("> a *b*\n\nc", 10);
  265. int parnodes = 0;
  266. cmark_event_type ev_type;
  267. cmark_iter *iter = cmark_iter_new(doc);
  268. cmark_node *cur;
  269. while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
  270. cur = cmark_iter_get_node(iter);
  271. if (cur->type == CMARK_NODE_PARAGRAPH &&
  272. ev_type == CMARK_EVENT_ENTER) {
  273. parnodes += 1;
  274. }
  275. }
  276. INT_EQ(runner, parnodes, 2, "iterate correctly counts paragraphs");
  277. cmark_iter_free(iter);
  278. cmark_node_free(doc);
  279. }
  280. static void
  281. iterator_delete(test_batch_runner *runner) {
  282. static const char md[] =
  283. "a *b* c\n"
  284. "\n"
  285. "* item1\n"
  286. "* item2\n"
  287. "\n"
  288. "a `b` c\n"
  289. "\n"
  290. "* item1\n"
  291. "* item2\n";
  292. cmark_node *doc = cmark_parse_document(md, sizeof(md) - 1);
  293. cmark_iter *iter = cmark_iter_new(doc);
  294. cmark_event_type ev_type;
  295. while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
  296. cmark_node *node = cmark_iter_get_node(iter);
  297. // Delete list, emph, and code nodes.
  298. if ((ev_type == CMARK_EVENT_EXIT &&
  299. node->type == CMARK_NODE_LIST) ||
  300. (ev_type == CMARK_EVENT_EXIT &&
  301. node->type == CMARK_NODE_EMPH) ||
  302. (ev_type == CMARK_EVENT_ENTER &&
  303. node->type == CMARK_NODE_CODE)) {
  304. cmark_node_free(node);
  305. }
  306. }
  307. char *html = cmark_render_html(doc, CMARK_OPT_DEFAULT);
  308. static const char expected[] =
  309. "<p>a c</p>\n"
  310. "<p>a c</p>\n";
  311. STR_EQ(runner, html, expected, "iterate and delete nodes");
  312. free(html);
  313. cmark_iter_free(iter);
  314. cmark_node_free(doc);
  315. }
  316. static void
  317. create_tree(test_batch_runner *runner)
  318. {
  319. char *html;
  320. cmark_node *doc = cmark_node_new(CMARK_NODE_DOCUMENT);
  321. cmark_node *p = cmark_node_new(CMARK_NODE_PARAGRAPH);
  322. OK(runner, !cmark_node_insert_before(doc, p),
  323. "insert before root fails");
  324. OK(runner, !cmark_node_insert_after(doc, p),
  325. "insert after root fails");
  326. OK(runner, cmark_node_append_child(doc, p), "append1");
  327. INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append1 consistent");
  328. OK(runner, cmark_node_parent(p) == doc, "node_parent");
  329. cmark_node *emph = cmark_node_new(CMARK_NODE_EMPH);
  330. OK(runner, cmark_node_prepend_child(p, emph), "prepend1");
  331. INT_EQ(runner, cmark_node_check(doc, NULL), 0, "prepend1 consistent");
  332. cmark_node *str1 = cmark_node_new(CMARK_NODE_TEXT);
  333. cmark_node_set_literal(str1, "Hello, ");
  334. OK(runner, cmark_node_prepend_child(p, str1), "prepend2");
  335. INT_EQ(runner, cmark_node_check(doc, NULL), 0, "prepend2 consistent");
  336. cmark_node *str3 = cmark_node_new(CMARK_NODE_TEXT);
  337. cmark_node_set_literal(str3, "!");
  338. OK(runner, cmark_node_append_child(p, str3), "append2");
  339. INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append2 consistent");
  340. cmark_node *str2 = cmark_node_new(CMARK_NODE_TEXT);
  341. cmark_node_set_literal(str2, "world");
  342. OK(runner, cmark_node_append_child(emph, str2), "append3");
  343. INT_EQ(runner, cmark_node_check(doc, NULL), 0, "append3 consistent");
  344. html = cmark_render_html(doc, CMARK_OPT_DEFAULT);
  345. STR_EQ(runner, html, "<p>Hello, <em>world</em>!</p>\n",
  346. "render_html");
  347. free(html);
  348. OK(runner, cmark_node_insert_before(str1, str3), "ins before1");
  349. INT_EQ(runner, cmark_node_check(doc, NULL), 0,
  350. "ins before1 consistent");
  351. // 31e
  352. OK(runner, cmark_node_first_child(p) == str3, "ins before1 works");
  353. OK(runner, cmark_node_insert_before(str1, emph), "ins before2");
  354. INT_EQ(runner, cmark_node_check(doc, NULL), 0,
  355. "ins before2 consistent");
  356. // 3e1
  357. OK(runner, cmark_node_last_child(p) == str1, "ins before2 works");
  358. OK(runner, cmark_node_insert_after(str1, str3), "ins after1");
  359. INT_EQ(runner, cmark_node_check(doc, NULL), 0,
  360. "ins after1 consistent");
  361. // e13
  362. OK(runner, cmark_node_next(str1) == str3, "ins after1 works");
  363. OK(runner, cmark_node_insert_after(str1, emph), "ins after2");
  364. INT_EQ(runner, cmark_node_check(doc, NULL), 0,
  365. "ins after2 consistent");
  366. // 1e3
  367. OK(runner, cmark_node_previous(emph) == str1, "ins after2 works");
  368. cmark_node_unlink(emph);
  369. html = cmark_render_html(doc, CMARK_OPT_DEFAULT);
  370. STR_EQ(runner, html, "<p>Hello, !</p>\n",
  371. "render_html after shuffling");
  372. free(html);
  373. cmark_node_free(doc);
  374. // TODO: Test that the contents of an unlinked inline are valid
  375. // after the parent block was destroyed. This doesn't work so far.
  376. cmark_node_free(emph);
  377. }
  378. void
  379. hierarchy(test_batch_runner *runner)
  380. {
  381. cmark_node *bquote1 = cmark_node_new(CMARK_NODE_BLOCK_QUOTE);
  382. cmark_node *bquote2 = cmark_node_new(CMARK_NODE_BLOCK_QUOTE);
  383. cmark_node *bquote3 = cmark_node_new(CMARK_NODE_BLOCK_QUOTE);
  384. OK(runner, cmark_node_append_child(bquote1, bquote2),
  385. "append bquote2");
  386. OK(runner, cmark_node_append_child(bquote2, bquote3),
  387. "append bquote3");
  388. OK(runner, !cmark_node_append_child(bquote3, bquote3),
  389. "adding a node as child of itself fails");
  390. OK(runner, !cmark_node_append_child(bquote3, bquote1),
  391. "adding a parent as child fails");
  392. cmark_node_free(bquote1);
  393. int max_node_type = CMARK_NODE_LAST_BLOCK > CMARK_NODE_LAST_INLINE
  394. ? CMARK_NODE_LAST_BLOCK : CMARK_NODE_LAST_INLINE;
  395. OK(runner, max_node_type < 32, "all node types < 32");
  396. int list_item_flag = 1 << CMARK_NODE_ITEM;
  397. int top_level_blocks =
  398. (1 << CMARK_NODE_BLOCK_QUOTE) |
  399. (1 << CMARK_NODE_LIST) |
  400. (1 << CMARK_NODE_CODE_BLOCK) |
  401. (1 << CMARK_NODE_HTML) |
  402. (1 << CMARK_NODE_PARAGRAPH) |
  403. (1 << CMARK_NODE_HEADER) |
  404. (1 << CMARK_NODE_HRULE);
  405. int all_inlines =
  406. (1 << CMARK_NODE_TEXT) |
  407. (1 << CMARK_NODE_SOFTBREAK) |
  408. (1 << CMARK_NODE_LINEBREAK) |
  409. (1 << CMARK_NODE_CODE) |
  410. (1 << CMARK_NODE_INLINE_HTML) |
  411. (1 << CMARK_NODE_EMPH) |
  412. (1 << CMARK_NODE_STRONG) |
  413. (1 << CMARK_NODE_LINK) |
  414. (1 << CMARK_NODE_IMAGE);
  415. test_content(runner, CMARK_NODE_DOCUMENT, top_level_blocks);
  416. test_content(runner, CMARK_NODE_BLOCK_QUOTE, top_level_blocks);
  417. test_content(runner, CMARK_NODE_LIST, list_item_flag);
  418. test_content(runner, CMARK_NODE_ITEM, top_level_blocks);
  419. test_content(runner, CMARK_NODE_CODE_BLOCK , 0);
  420. test_content(runner, CMARK_NODE_HTML, 0);
  421. test_content(runner, CMARK_NODE_PARAGRAPH, all_inlines);
  422. test_content(runner, CMARK_NODE_HEADER, all_inlines);
  423. test_content(runner, CMARK_NODE_HRULE, 0);
  424. test_content(runner, CMARK_NODE_TEXT, 0);
  425. test_content(runner, CMARK_NODE_SOFTBREAK, 0);
  426. test_content(runner, CMARK_NODE_LINEBREAK, 0);
  427. test_content(runner, CMARK_NODE_CODE, 0);
  428. test_content(runner, CMARK_NODE_INLINE_HTML, 0);
  429. test_content(runner, CMARK_NODE_EMPH, all_inlines);
  430. test_content(runner, CMARK_NODE_STRONG, all_inlines);
  431. test_content(runner, CMARK_NODE_LINK, all_inlines);
  432. test_content(runner, CMARK_NODE_IMAGE, all_inlines);
  433. }
  434. static void
  435. test_content(test_batch_runner *runner, cmark_node_type type,
  436. int allowed_content)
  437. {
  438. cmark_node *node = cmark_node_new(type);
  439. for (int i = 0; i < num_node_types; ++i) {
  440. cmark_node_type child_type = node_types[i];
  441. cmark_node *child = cmark_node_new(child_type);
  442. int got = cmark_node_append_child(node, child);
  443. int expected = (allowed_content >> child_type) & 1;
  444. INT_EQ(runner, got, expected,
  445. "add %d as child of %d", child_type, type);
  446. cmark_node_free(child);
  447. }
  448. cmark_node_free(node);
  449. }
  450. static void
  451. parser(test_batch_runner *runner)
  452. {
  453. test_md_to_html(runner, "No newline", "<p>No newline</p>\n",
  454. "document without trailing newline");
  455. }
  456. static void
  457. render_html(test_batch_runner *runner)
  458. {
  459. char *html;
  460. static const char markdown[] =
  461. "foo *bar*\n"
  462. "\n"
  463. "paragraph 2\n";
  464. cmark_node *doc = cmark_parse_document(markdown, sizeof(markdown) - 1);
  465. cmark_node *paragraph = cmark_node_first_child(doc);
  466. html = cmark_render_html(paragraph, CMARK_OPT_DEFAULT);
  467. STR_EQ(runner, html, "<p>foo <em>bar</em></p>\n",
  468. "render single paragraph");
  469. free(html);
  470. cmark_node *string = cmark_node_first_child(paragraph);
  471. html = cmark_render_html(string, CMARK_OPT_DEFAULT);
  472. STR_EQ(runner, html, "foo ", "render single inline");
  473. free(html);
  474. cmark_node *emph = cmark_node_next(string);
  475. html = cmark_render_html(emph, CMARK_OPT_DEFAULT);
  476. STR_EQ(runner, html, "<em>bar</em>", "render inline with children");
  477. free(html);
  478. cmark_node_free(doc);
  479. }
  480. static void
  481. utf8(test_batch_runner *runner)
  482. {
  483. // Ranges
  484. test_char(runner, 1, "\x01", "valid utf8 01");
  485. test_char(runner, 1, "\x7F", "valid utf8 7F");
  486. test_char(runner, 0, "\x80", "invalid utf8 80");
  487. test_char(runner, 0, "\xBF", "invalid utf8 BF");
  488. test_char(runner, 0, "\xC0\x80", "invalid utf8 C080");
  489. test_char(runner, 0, "\xC1\xBF", "invalid utf8 C1BF");
  490. test_char(runner, 1, "\xC2\x80", "valid utf8 C280");
  491. test_char(runner, 1, "\xDF\xBF", "valid utf8 DFBF");
  492. test_char(runner, 0, "\xE0\x80\x80", "invalid utf8 E08080");
  493. test_char(runner, 0, "\xE0\x9F\xBF", "invalid utf8 E09FBF");
  494. test_char(runner, 1, "\xE0\xA0\x80", "valid utf8 E0A080");
  495. test_char(runner, 1, "\xED\x9F\xBF", "valid utf8 ED9FBF");
  496. test_char(runner, 0, "\xED\xA0\x80", "invalid utf8 EDA080");
  497. test_char(runner, 0, "\xED\xBF\xBF", "invalid utf8 EDBFBF");
  498. test_char(runner, 0, "\xF0\x80\x80\x80", "invalid utf8 F0808080");
  499. test_char(runner, 0, "\xF0\x8F\xBF\xBF", "invalid utf8 F08FBFBF");
  500. test_char(runner, 1, "\xF0\x90\x80\x80", "valid utf8 F0908080");
  501. test_char(runner, 1, "\xF4\x8F\xBF\xBF", "valid utf8 F48FBFBF");
  502. test_char(runner, 0, "\xF4\x90\x80\x80", "invalid utf8 F4908080");
  503. test_char(runner, 0, "\xF7\xBF\xBF\xBF", "invalid utf8 F7BFBFBF");
  504. test_char(runner, 0, "\xF8", "invalid utf8 F8");
  505. test_char(runner, 0, "\xFF", "invalid utf8 FF");
  506. // Incomplete byte sequences at end of input
  507. test_incomplete_char(runner, "\xE0\xA0", "invalid utf8 E0A0");
  508. test_incomplete_char(runner, "\xF0\x90\x80", "invalid utf8 F09080");
  509. // Invalid continuation bytes
  510. test_continuation_byte(runner, "\xC2\x80");
  511. test_continuation_byte(runner, "\xE0\xA0\x80");
  512. test_continuation_byte(runner, "\xF0\x90\x80\x80");
  513. // Test string containing null character
  514. static const char string_with_null[] = "((((\0))))";
  515. char *html = cmark_markdown_to_html(string_with_null,
  516. sizeof(string_with_null) - 1);
  517. STR_EQ(runner, html, "<p>((((" UTF8_REPL "))))</p>\n",
  518. "utf8 with U+0000");
  519. free(html);
  520. }
  521. static void
  522. test_char(test_batch_runner *runner, int valid, const char *utf8,
  523. const char *msg)
  524. {
  525. char buf[20];
  526. sprintf(buf, "((((%s))))", utf8);
  527. if (valid) {
  528. char expected[30];
  529. sprintf(expected, "<p>((((%s))))</p>\n", utf8);
  530. test_md_to_html(runner, buf, expected, msg);
  531. }
  532. else {
  533. test_md_to_html(runner, buf, "<p>((((" UTF8_REPL "))))</p>\n",
  534. msg);
  535. }
  536. }
  537. static void
  538. test_incomplete_char(test_batch_runner *runner, const char *utf8,
  539. const char *msg)
  540. {
  541. char buf[20];
  542. sprintf(buf, "----%s", utf8);
  543. test_md_to_html(runner, buf, "<p>----" UTF8_REPL "</p>\n", msg);
  544. }
  545. static void
  546. test_continuation_byte(test_batch_runner *runner, const char *utf8)
  547. {
  548. int len = strlen(utf8);
  549. for (int pos = 1; pos < len; ++pos) {
  550. char buf[20];
  551. sprintf(buf, "((((%s))))", utf8);
  552. buf[4+pos] = '\x20';
  553. char expected[50];
  554. strcpy(expected, "<p>((((" UTF8_REPL "\x20");
  555. for (int i = pos + 1; i < len; ++i) {
  556. strcat(expected, UTF8_REPL);
  557. }
  558. strcat(expected, "))))</p>\n");
  559. char *html = cmark_markdown_to_html(buf, strlen(buf));
  560. STR_EQ(runner, html, expected,
  561. "invalid utf8 continuation byte %d/%d", pos, len);
  562. free(html);
  563. }
  564. }
  565. static void
  566. test_md_to_html(test_batch_runner *runner, const char *markdown,
  567. const char *expected_html, const char *msg)
  568. {
  569. char *html = cmark_markdown_to_html(markdown, strlen(markdown));
  570. STR_EQ(runner, html, expected_html, msg);
  571. free(html);
  572. }
  573. int main() {
  574. int retval;
  575. test_batch_runner *runner = test_batch_runner_new();
  576. version(runner);
  577. constructor(runner);
  578. accessors(runner);
  579. node_check(runner);
  580. iterator(runner);
  581. iterator_delete(runner);
  582. create_tree(runner);
  583. hierarchy(runner);
  584. parser(runner);
  585. render_html(runner);
  586. utf8(runner);
  587. test_cplusplus(runner);
  588. test_print_summary(runner);
  589. retval = test_ok(runner) ? 0 : 1;
  590. free(runner);
  591. return retval;
  592. }