aboutsummaryrefslogtreecommitdiff
path: root/src/inlines.c
blob: 78ebaf4be06114f08e9505a0a94479d359559884 (plain)
  1. #include <stdlib.h>
  2. #include <string.h>
  3. #include <stdio.h>
  4. #include <stdbool.h>
  5. #include <ctype.h>
  6. #include "cmark.h"
  7. #include "html/houdini.h"
  8. #include "utf8.h"
  9. #include "scanners.h"
  10. #include "ast.h"
  11. #include "inlines.h"
  12. typedef struct DelimiterStack {
  13. struct DelimiterStack *previous;
  14. struct DelimiterStack *next;
  15. node_inl *first_inline;
  16. int delim_count;
  17. unsigned char delim_char;
  18. int position;
  19. bool can_open;
  20. bool can_close;
  21. } delimiter_stack;
  22. typedef struct Subject {
  23. chunk input;
  24. int pos;
  25. reference_map *refmap;
  26. delimiter_stack *delimiters;
  27. } subject;
  28. static node_inl *parse_inlines_from_subject(subject* subj);
  29. static int parse_inline(subject* subj, node_inl ** last);
  30. static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap);
  31. static int subject_find_special_char(subject *subj);
  32. static unsigned char *bufdup(const unsigned char *buf)
  33. {
  34. unsigned char *new = NULL;
  35. if (buf) {
  36. int len = strlen((char *)buf);
  37. new = calloc(len + 1, sizeof(*new));
  38. if(new != NULL) {
  39. memcpy(new, buf, len + 1);
  40. }
  41. }
  42. return new;
  43. }
  44. static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap)
  45. {
  46. e->input.data = buffer->ptr;
  47. e->input.len = buffer->size;
  48. e->input.alloc = 0;
  49. e->pos = 0;
  50. e->refmap = refmap;
  51. e->delimiters = NULL;
  52. chunk_rtrim(&e->input);
  53. }
  54. static inline int isbacktick(int c)
  55. {
  56. return (c == '`');
  57. }
  58. static inline unsigned char peek_char(subject *subj)
  59. {
  60. return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
  61. }
  62. static inline unsigned char peek_at(subject *subj, int pos)
  63. {
  64. return subj->input.data[pos];
  65. }
  66. // Return true if there are more characters in the subject.
  67. static inline int is_eof(subject* subj)
  68. {
  69. return (subj->pos >= subj->input.len);
  70. }
  71. // Advance the subject. Doesn't check for eof.
  72. #define advance(subj) (subj)->pos += 1
  73. // Take characters while a predicate holds, and return a string.
  74. static inline chunk take_while(subject* subj, int (*f)(int))
  75. {
  76. unsigned char c;
  77. int startpos = subj->pos;
  78. int len = 0;
  79. while ((c = peek_char(subj)) && (*f)(c)) {
  80. advance(subj);
  81. len++;
  82. }
  83. return chunk_dup(&subj->input, startpos, len);
  84. }
  85. // Try to process a backtick code span that began with a
  86. // span of ticks of length openticklength length (already
  87. // parsed). Return 0 if you don't find matching closing
  88. // backticks, otherwise return the position in the subject
  89. // after the closing backticks.
  90. static int scan_to_closing_backticks(subject* subj, int openticklength)
  91. {
  92. // read non backticks
  93. unsigned char c;
  94. while ((c = peek_char(subj)) && c != '`') {
  95. advance(subj);
  96. }
  97. if (is_eof(subj)) {
  98. return 0; // did not find closing ticks, return 0
  99. }
  100. int numticks = 0;
  101. while (peek_char(subj) == '`') {
  102. advance(subj);
  103. numticks++;
  104. }
  105. if (numticks != openticklength){
  106. return(scan_to_closing_backticks(subj, openticklength));
  107. }
  108. return (subj->pos);
  109. }
  110. // Parse backtick code section or raw backticks, return an inline.
  111. // Assumes that the subject has a backtick at the current position.
  112. static node_inl* handle_backticks(subject *subj)
  113. {
  114. chunk openticks = take_while(subj, isbacktick);
  115. int startpos = subj->pos;
  116. int endpos = scan_to_closing_backticks(subj, openticks.len);
  117. if (endpos == 0) { // not found
  118. subj->pos = startpos; // rewind
  119. return make_str(openticks);
  120. } else {
  121. strbuf buf = GH_BUF_INIT;
  122. strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
  123. strbuf_trim(&buf);
  124. strbuf_normalize_whitespace(&buf);
  125. return make_code(chunk_buf_detach(&buf));
  126. }
  127. }
  128. // Scan ***, **, or * and return number scanned, or 0.
  129. // Advances position.
  130. static int scan_delims(subject* subj, unsigned char c, bool * can_open, bool * can_close)
  131. {
  132. int numdelims = 0;
  133. unsigned char char_before, char_after;
  134. char_before = subj->pos == 0 ? '\n' : peek_at(subj, subj->pos - 1);
  135. while (peek_char(subj) == c) {
  136. numdelims++;
  137. advance(subj);
  138. }
  139. char_after = peek_char(subj);
  140. *can_open = numdelims > 0 && !isspace(char_after);
  141. *can_close = numdelims > 0 && !isspace(char_before);
  142. if (c == '_') {
  143. *can_open = *can_open && !isalnum(char_before);
  144. *can_close = *can_close && !isalnum(char_after);
  145. }
  146. return numdelims;
  147. }
  148. /*
  149. static void print_delimiters(subject *subj)
  150. {
  151. delimiter_stack *tempstack;
  152. tempstack = subj->delimiters;
  153. while (tempstack != NULL) {
  154. printf("Item at %p: %d %d %d %d next(%p) prev(%p)\n",
  155. tempstack, tempstack->delim_count, tempstack->delim_char,
  156. tempstack->can_open, tempstack->can_close,
  157. tempstack->next, tempstack->previous);
  158. tempstack = tempstack->previous;
  159. }
  160. }
  161. */
  162. static void remove_delimiter(subject *subj, delimiter_stack *stack)
  163. {
  164. if (stack->previous != NULL) {
  165. stack->previous->next = stack->next;
  166. }
  167. if (stack->next == NULL) {
  168. // top of stack
  169. subj->delimiters = stack->previous;
  170. } else {
  171. stack->next->previous = stack->previous;
  172. }
  173. free(stack);
  174. }
  175. static delimiter_stack * push_delimiter(subject *subj,
  176. int numdelims,
  177. unsigned char c,
  178. bool can_open,
  179. bool can_close,
  180. node_inl *inl_text)
  181. {
  182. delimiter_stack *istack =
  183. (delimiter_stack*)malloc(sizeof(delimiter_stack));
  184. if (istack == NULL) {
  185. return NULL;
  186. }
  187. istack->delim_count = numdelims;
  188. istack->delim_char = c;
  189. istack->can_open = can_open;
  190. istack->can_close = can_close;
  191. istack->first_inline = inl_text;
  192. istack->previous = subj->delimiters;
  193. istack->next = NULL;
  194. if (istack->previous != NULL) {
  195. istack->previous->next = istack;
  196. }
  197. istack->position = subj->pos;
  198. return istack;
  199. }
  200. // Parse strong/emph or a fallback.
  201. // Assumes the subject has '_' or '*' at the current position.
  202. static node_inl* handle_strong_emph(subject* subj, unsigned char c, node_inl **last)
  203. {
  204. int numdelims;
  205. node_inl * inl_text;
  206. bool can_open, can_close;
  207. numdelims = scan_delims(subj, c, &can_open, &can_close);
  208. inl_text = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims));
  209. if (can_open || can_close) {
  210. subj->delimiters = push_delimiter(subj, numdelims, c, can_open, can_close,
  211. inl_text);
  212. }
  213. return inl_text;
  214. }
  215. static void process_emphasis(subject *subj, delimiter_stack *stack_bottom)
  216. {
  217. delimiter_stack *closer = subj->delimiters;
  218. delimiter_stack *opener, *tempstack, *nextstack;
  219. int use_delims;
  220. node_inl *inl, *tmp, *emph;
  221. // move back to first relevant delim.
  222. while (closer != NULL && closer->previous != stack_bottom) {
  223. closer = closer->previous;
  224. }
  225. // now move forward, looking for closers, and handling each
  226. while (closer != NULL) {
  227. if (closer->can_close &&
  228. (closer->delim_char == '*' || closer->delim_char == '_')) {
  229. // Now look backwards for first matching opener:
  230. opener = closer->previous;
  231. while (opener != NULL && opener != stack_bottom) {
  232. if (opener->delim_char == closer->delim_char &&
  233. opener->can_open) {
  234. break;
  235. }
  236. opener = opener->previous;
  237. }
  238. if (opener != NULL && opener != stack_bottom) {
  239. // calculate the actual number of delimeters used from this closer
  240. if (closer->delim_count < 3 || opener->delim_count < 3) {
  241. use_delims = closer->delim_count <= opener->delim_count ?
  242. closer->delim_count : opener->delim_count;
  243. } else { // closer and opener both have >= 3 delims
  244. use_delims = closer->delim_count % 2 == 0 ? 2 : 1;
  245. }
  246. inl = opener->first_inline;
  247. // remove used delimiters from stack elements and associated inlines.
  248. opener->delim_count -= use_delims;
  249. closer->delim_count -= use_delims;
  250. inl->content.literal.len = opener->delim_count;
  251. closer->first_inline->content.literal.len = closer->delim_count;
  252. // free delimiters between opener and closer
  253. tempstack = closer->previous;
  254. while (tempstack != NULL && tempstack != opener) {
  255. nextstack = tempstack->previous;
  256. remove_delimiter(subj, tempstack);
  257. tempstack = nextstack;
  258. }
  259. // create new emph or strong, and splice it in to our inlines
  260. // between the opener and closer
  261. emph = use_delims == 1 ? make_emph(inl->next) : make_strong(inl->next);
  262. emph->next = closer->first_inline;
  263. inl->next = emph;
  264. tmp = emph->content.inlines;
  265. while (tmp->next != NULL && tmp->next != closer->first_inline) {
  266. tmp = tmp->next;
  267. }
  268. tmp->next = NULL;
  269. // if opener has 0 delims, remove it and its associated inline
  270. if (opener->delim_count == 0) {
  271. // replace empty opener inline with emph
  272. chunk_free(&(inl->content.literal));
  273. inl->tag = emph->tag;
  274. inl->next = emph->next;
  275. inl->content.inlines = emph->content.inlines;
  276. free(emph);
  277. emph = inl;
  278. // remove opener from stack
  279. remove_delimiter(subj, opener);
  280. }
  281. // if closer has 0 delims, remove it and its associated inline
  282. if (closer->delim_count == 0) {
  283. // remove empty closer inline
  284. tmp = closer->first_inline;
  285. emph->next = tmp->next;
  286. tmp->next = NULL;
  287. cmark_free_inlines(tmp);
  288. // remove closer from stack
  289. tempstack = closer->next;
  290. remove_delimiter(subj, closer);
  291. closer = tempstack;
  292. }
  293. } else {
  294. closer = closer->next;
  295. }
  296. } else {
  297. closer = closer->next;
  298. }
  299. }
  300. // free all delimiters in stack down to stack_bottom:
  301. while (subj->delimiters != stack_bottom) {
  302. remove_delimiter(subj, subj->delimiters);
  303. }
  304. }
  305. // Parse backslash-escape or just a backslash, returning an inline.
  306. static node_inl* handle_backslash(subject *subj)
  307. {
  308. advance(subj);
  309. unsigned char nextchar = peek_char(subj);
  310. if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped
  311. advance(subj);
  312. return make_str(chunk_dup(&subj->input, subj->pos - 1, 1));
  313. } else if (nextchar == '\n') {
  314. advance(subj);
  315. return make_linebreak();
  316. } else {
  317. return make_str(chunk_literal("\\"));
  318. }
  319. }
  320. // Parse an entity or a regular "&" string.
  321. // Assumes the subject has an '&' character at the current position.
  322. static node_inl* handle_entity(subject* subj)
  323. {
  324. strbuf ent = GH_BUF_INIT;
  325. size_t len;
  326. advance(subj);
  327. len = houdini_unescape_ent(&ent,
  328. subj->input.data + subj->pos,
  329. subj->input.len - subj->pos
  330. );
  331. if (len == 0)
  332. return make_str(chunk_literal("&"));
  333. subj->pos += len;
  334. return make_str(chunk_buf_detach(&ent));
  335. }
  336. // Like make_str, but parses entities.
  337. // Returns an inline sequence consisting of str and entity elements.
  338. static node_inl *make_str_with_entities(chunk *content)
  339. {
  340. strbuf unescaped = GH_BUF_INIT;
  341. if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) {
  342. return make_str(chunk_buf_detach(&unescaped));
  343. } else {
  344. return make_str(*content);
  345. }
  346. }
  347. // Clean a URL: remove surrounding whitespace and surrounding <>,
  348. // and remove \ that escape punctuation.
  349. unsigned char *clean_url(chunk *url)
  350. {
  351. strbuf buf = GH_BUF_INIT;
  352. chunk_trim(url);
  353. if (url->len == 0)
  354. return NULL;
  355. if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
  356. houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
  357. } else {
  358. houdini_unescape_html_f(&buf, url->data, url->len);
  359. }
  360. strbuf_unescape(&buf);
  361. return strbuf_detach(&buf);
  362. }
  363. unsigned char *clean_title(chunk *title)
  364. {
  365. strbuf buf = GH_BUF_INIT;
  366. unsigned char first, last;
  367. if (title->len == 0)
  368. return NULL;
  369. first = title->data[0];
  370. last = title->data[title->len - 1];
  371. // remove surrounding quotes if any:
  372. if ((first == '\'' && last == '\'') ||
  373. (first == '(' && last == ')') ||
  374. (first == '"' && last == '"')) {
  375. houdini_unescape_html_f(&buf, title->data + 1, title->len - 2);
  376. } else {
  377. houdini_unescape_html_f(&buf, title->data, title->len);
  378. }
  379. strbuf_unescape(&buf);
  380. return strbuf_detach(&buf);
  381. }
  382. // Parse an autolink or HTML tag.
  383. // Assumes the subject has a '<' character at the current position.
  384. static node_inl* handle_pointy_brace(subject* subj)
  385. {
  386. int matchlen = 0;
  387. chunk contents;
  388. advance(subj); // advance past first <
  389. // first try to match a URL autolink
  390. matchlen = scan_autolink_uri(&subj->input, subj->pos);
  391. if (matchlen > 0) {
  392. contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
  393. subj->pos += matchlen;
  394. return make_autolink(
  395. make_str_with_entities(&contents),
  396. contents, 0
  397. );
  398. }
  399. // next try to match an email autolink
  400. matchlen = scan_autolink_email(&subj->input, subj->pos);
  401. if (matchlen > 0) {
  402. contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
  403. subj->pos += matchlen;
  404. return make_autolink(
  405. make_str_with_entities(&contents),
  406. contents, 1
  407. );
  408. }
  409. // finally, try to match an html tag
  410. matchlen = scan_html_tag(&subj->input, subj->pos);
  411. if (matchlen > 0) {
  412. contents = chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
  413. subj->pos += matchlen;
  414. return make_raw_html(contents);
  415. }
  416. // if nothing matches, just return the opening <:
  417. return make_str(chunk_literal("<"));
  418. }
  419. // Parse a link label. Returns 1 if successful.
  420. // Note: unescaped brackets are not allowed in labels.
  421. // The label begins with `[` and ends with the first `]` character
  422. // encountered. Backticks in labels do not start code spans.
  423. static int link_label(subject* subj, chunk *raw_label)
  424. {
  425. int startpos = subj->pos;
  426. int length = 0;
  427. advance(subj); // advance past [
  428. unsigned char c;
  429. while ((c = peek_char(subj)) && c != '[' && c != ']') {
  430. if (c == '\\') {
  431. advance(subj);
  432. length++;
  433. if (ispunct(peek_char(subj))) {
  434. advance(subj);
  435. length++;
  436. }
  437. } else {
  438. advance(subj);
  439. length++;
  440. }
  441. if (length > MAX_LINK_LABEL_LENGTH) {
  442. goto noMatch;
  443. }
  444. }
  445. if (c == ']') { // match found
  446. *raw_label = chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
  447. advance(subj); // advance past ]
  448. return 1;
  449. }
  450. noMatch:
  451. subj->pos = startpos; // rewind
  452. return 0;
  453. }
  454. // Return a link, an image, or a literal close bracket.
  455. static node_inl* handle_close_bracket(subject* subj, node_inl **last)
  456. {
  457. int initial_pos;
  458. int starturl, endurl, starttitle, endtitle, endall;
  459. int n;
  460. int sps;
  461. reference *ref;
  462. bool is_image = false;
  463. chunk urlchunk, titlechunk;
  464. unsigned char *url, *title;
  465. delimiter_stack *opener;
  466. delimiter_stack *tempstack;
  467. node_inl *link_text;
  468. node_inl *inl;
  469. chunk raw_label;
  470. advance(subj); // advance past ]
  471. initial_pos = subj->pos;
  472. // look through stack of delimiters for a [ or !
  473. opener = subj->delimiters;
  474. while (opener) {
  475. if (opener->delim_char == '[' || opener->delim_char == '!') {
  476. break;
  477. }
  478. opener = opener->previous;
  479. }
  480. if (opener == NULL) {
  481. return make_str(chunk_literal("]"));
  482. }
  483. // If we got here, we matched a potential link/image text.
  484. is_image = opener->delim_char == '!';
  485. link_text = opener->first_inline->next;
  486. // Now we check to see if it's a link/image.
  487. // First, look for an inline link.
  488. if (peek_char(subj) == '(' &&
  489. ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
  490. ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {
  491. // try to parse an explicit link:
  492. starturl = subj->pos + 1 + sps; // after (
  493. endurl = starturl + n;
  494. starttitle = endurl + scan_spacechars(&subj->input, endurl);
  495. // ensure there are spaces btw url and title
  496. endtitle = (starttitle == endurl) ? starttitle :
  497. starttitle + scan_link_title(&subj->input, starttitle);
  498. endall = endtitle + scan_spacechars(&subj->input, endtitle);
  499. if (peek_at(subj, endall) == ')') {
  500. subj->pos = endall + 1;
  501. urlchunk = chunk_dup(&subj->input, starturl, endurl - starturl);
  502. titlechunk = chunk_dup(&subj->input, starttitle, endtitle - starttitle);
  503. url = clean_url(&urlchunk);
  504. title = clean_title(&titlechunk);
  505. chunk_free(&urlchunk);
  506. chunk_free(&titlechunk);
  507. goto match;
  508. } else {
  509. goto noMatch;
  510. }
  511. }
  512. // Next, look for a following [link label] that matches in refmap.
  513. // skip spaces
  514. subj->pos = subj->pos + scan_spacechars(&subj->input, subj->pos);
  515. raw_label = chunk_literal("");
  516. if (!link_label(subj, &raw_label) || raw_label.len == 0) {
  517. chunk_free(&raw_label);
  518. raw_label = chunk_dup(&subj->input, opener->position,
  519. initial_pos - opener->position - 1);
  520. }
  521. ref = reference_lookup(subj->refmap, &raw_label);
  522. chunk_free(&raw_label);
  523. if (ref != NULL) { // found
  524. url = bufdup(ref->url);
  525. title = bufdup(ref->title);
  526. goto match;
  527. } else {
  528. goto noMatch;
  529. }
  530. noMatch:
  531. // If we fall through to here, it means we didn't match a link:
  532. remove_delimiter(subj, opener); // remove this opener from delimiter stack
  533. subj->pos = initial_pos;
  534. return make_str(chunk_literal("]"));
  535. match:
  536. inl = opener->first_inline;
  537. inl->tag = is_image ? INL_IMAGE : INL_LINK;
  538. chunk_free(&inl->content.literal);
  539. inl->content.linkable.label = link_text;
  540. process_emphasis(subj, opener->previous);
  541. inl->content.linkable.url = url;
  542. inl->content.linkable.title = title;
  543. inl->next = NULL;
  544. *last = inl;
  545. // process_emphasis will remove this delimiter and all later ones.
  546. // Now, if we have a link, we also want to remove earlier link
  547. // delimiters. (This code can be removed if we decide to allow links
  548. // inside links.)
  549. if (!is_image) {
  550. opener = subj->delimiters;
  551. while (opener != NULL) {
  552. tempstack = opener->previous;
  553. if (opener->delim_char == '[') {
  554. remove_delimiter(subj, opener);
  555. }
  556. opener = tempstack;
  557. }
  558. }
  559. return NULL;
  560. }
  561. // Parse a hard or soft linebreak, returning an inline.
  562. // Assumes the subject has a newline at the current position.
  563. static node_inl* handle_newline(subject *subj)
  564. {
  565. int nlpos = subj->pos;
  566. // skip over newline
  567. advance(subj);
  568. // skip spaces at beginning of line
  569. while (peek_char(subj) == ' ') {
  570. advance(subj);
  571. }
  572. if (nlpos > 1 &&
  573. peek_at(subj, nlpos - 1) == ' ' &&
  574. peek_at(subj, nlpos - 2) == ' ') {
  575. return make_linebreak();
  576. } else {
  577. return make_softbreak();
  578. }
  579. }
  580. // Parse inlines til end of subject, returning inlines.
  581. extern node_inl* parse_inlines_from_subject(subject* subj)
  582. {
  583. node_inl* result = NULL;
  584. node_inl** last = &result;
  585. node_inl* first = NULL;
  586. while (!is_eof(subj) && parse_inline(subj, last)) {
  587. if (!first) {
  588. first = *last;
  589. }
  590. }
  591. process_emphasis(subj, NULL);
  592. return first;
  593. }
  594. static int subject_find_special_char(subject *subj)
  595. {
  596. // "\n\\`&_*[]<!"
  597. static const int8_t SPECIAL_CHARS[256] = {
  598. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
  599. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  600. 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
  601. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
  602. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  603. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
  604. 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  605. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  606. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  607. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  608. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  609. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  610. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  611. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  612. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  613. 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
  614. int n = subj->pos + 1;
  615. while (n < subj->input.len) {
  616. if (SPECIAL_CHARS[subj->input.data[n]])
  617. return n;
  618. n++;
  619. }
  620. return subj->input.len;
  621. }
  622. // Parse an inline, advancing subject, and add it to last element.
  623. // Adjust tail to point to new last element of list.
  624. // Return 0 if no inline can be parsed, 1 otherwise.
  625. static int parse_inline(subject* subj, node_inl ** last)
  626. {
  627. node_inl* new = NULL;
  628. chunk contents;
  629. unsigned char c;
  630. int endpos;
  631. c = peek_char(subj);
  632. if (c == 0) {
  633. return 0;
  634. }
  635. switch(c){
  636. case '\n':
  637. new = handle_newline(subj);
  638. break;
  639. case '`':
  640. new = handle_backticks(subj);
  641. break;
  642. case '\\':
  643. new = handle_backslash(subj);
  644. break;
  645. case '&':
  646. new = handle_entity(subj);
  647. break;
  648. case '<':
  649. new = handle_pointy_brace(subj);
  650. break;
  651. case '*':
  652. case '_':
  653. new = handle_strong_emph(subj, c, last);
  654. break;
  655. case '[':
  656. advance(subj);
  657. new = make_str(chunk_literal("["));
  658. subj->delimiters = push_delimiter(subj, 1, '[', true, false, new);
  659. break;
  660. case ']':
  661. new = handle_close_bracket(subj, last);
  662. break;
  663. case '!':
  664. advance(subj);
  665. if (peek_char(subj) == '[') {
  666. advance(subj);
  667. new = make_str(chunk_literal("!["));
  668. subj->delimiters = push_delimiter(subj, 1, '!', false, true, new);
  669. } else {
  670. new = make_str(chunk_literal("!"));
  671. }
  672. break;
  673. default:
  674. endpos = subject_find_special_char(subj);
  675. contents = chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
  676. subj->pos = endpos;
  677. // if we're at a newline, strip trailing spaces.
  678. if (peek_char(subj) == '\n') {
  679. chunk_rtrim(&contents);
  680. }
  681. new = make_str(contents);
  682. }
  683. if (*last == NULL) {
  684. *last = new;
  685. } else if (new) {
  686. cmark_append_inlines(*last, new);
  687. *last = new;
  688. }
  689. return 1;
  690. }
  691. extern node_inl* parse_inlines(strbuf *input, reference_map *refmap)
  692. {
  693. subject subj;
  694. subject_from_buf(&subj, input, refmap);
  695. return parse_inlines_from_subject(&subj);
  696. }
  697. // Parse zero or more space characters, including at most one newline.
  698. static void spnl(subject* subj)
  699. {
  700. bool seen_newline = false;
  701. while (peek_char(subj) == ' ' ||
  702. (!seen_newline &&
  703. (seen_newline = peek_char(subj) == '\n'))) {
  704. advance(subj);
  705. }
  706. }
  707. // Parse reference. Assumes string begins with '[' character.
  708. // Modify refmap if a reference is encountered.
  709. // Return 0 if no reference found, otherwise position of subject
  710. // after reference is parsed.
  711. int parse_reference_inline(strbuf *input, reference_map *refmap)
  712. {
  713. subject subj;
  714. chunk lab;
  715. chunk url;
  716. chunk title;
  717. int matchlen = 0;
  718. int beforetitle;
  719. subject_from_buf(&subj, input, NULL);
  720. // parse label:
  721. if (!link_label(&subj, &lab))
  722. return 0;
  723. // colon:
  724. if (peek_char(&subj) == ':') {
  725. advance(&subj);
  726. } else {
  727. return 0;
  728. }
  729. // parse link url:
  730. spnl(&subj);
  731. matchlen = scan_link_url(&subj.input, subj.pos);
  732. if (matchlen) {
  733. url = chunk_dup(&subj.input, subj.pos, matchlen);
  734. subj.pos += matchlen;
  735. } else {
  736. return 0;
  737. }
  738. // parse optional link_title
  739. beforetitle = subj.pos;
  740. spnl(&subj);
  741. matchlen = scan_link_title(&subj.input, subj.pos);
  742. if (matchlen) {
  743. title = chunk_dup(&subj.input, subj.pos, matchlen);
  744. subj.pos += matchlen;
  745. } else {
  746. subj.pos = beforetitle;
  747. title = chunk_literal("");
  748. }
  749. // parse final spaces and newline:
  750. while (peek_char(&subj) == ' ') {
  751. advance(&subj);
  752. }
  753. if (peek_char(&subj) == '\n') {
  754. advance(&subj);
  755. } else if (peek_char(&subj) != 0) {
  756. return 0;
  757. }
  758. // insert reference into refmap
  759. reference_create(refmap, &lab, &url, &title);
  760. return subj.pos;
  761. }