Merge pull request #1 from jgm/master

merge
author: Kārlis Gaņģis <Knagis@users.noreply.github.com> 2014-10-08 11:02:43 +0300
committer: Kārlis Gaņģis <Knagis@users.noreply.github.com> 2014-10-08 11:02:43 +0300
commit: 43b8646d444d6828014bdd38df956f5871d60efe (patch)
tree: 66a1b9f6bd38822ee0c01a097ba7465157a5480c /src/inlines.c
parent: 3567b844ca1fd7bec62801f8758545f7bd5cbaae (diff)
parent: daeb55edc7636deebc2a79621ea06c3548d67827 (diff)
1 files changed, 816 insertions, 861 deletions
diff --git a/src/inlines.c b/src/inlines.c
index f75c846..71d75e9 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -1,251 +1,216 @@
 #include <stdlib.h>
+#include <string.h>
 #include <stdio.h>
 #include <stdbool.h>
 #include <ctype.h>
-#include "bstrlib.h"
+
 #include "stmd.h"
-#include "uthash.h"
-#include "debug.h"
-#include "scanners.h"
+#include "html/houdini.h"
 #include "utf8.h"
+#include "scanners.h"
+#include "inlines.h"
+
+typedef struct Subject {
+	chunk input;
+	int pos;
+	int label_nestlevel;
+	reference_map *refmap;
+} subject;
+
+static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap);
+static node_inl *parse_inlines_while(subject* subj, int (*f)(subject*));
+static int parse_inline(subject* subj, node_inl ** last);
 
-extern void free_reference(reference *ref) {
-  bdestroy(ref->label);
-  bdestroy(ref->url);
-  bdestroy(ref->title);
-  free(ref);
-}
-
-extern void free_reference_map(reference **refmap) {
-  /* free the hash table contents */
-  reference *s;
-  reference *tmp;
-  if (refmap != NULL) {
-    HASH_ITER(hh, *refmap, s, tmp) {
-      HASH_DEL(*refmap, s);
-      free_reference(s);
-    }
-    free(refmap);
-  }
-}
-
-// normalize reference:  collapse internal whitespace to single space,
-// remove leading/trailing whitespace, case fold
-static bstring normalize_reference(bstring s)
-{
-  bstring normalized = case_fold(s);
-  int pos = 0;
-  int startpos;
-  char c;
-  while ((c = bchar(normalized, pos))) {
-    if (isspace(c)) {
-      startpos = pos;
-      // skip til next non-space
-      pos++;
-      while (isspace(bchar(s, pos))) {
-        pos++;
-      }
-      bdelete(normalized, startpos, pos - startpos);
-      binsertch(normalized, startpos, 1, ' ');
-      pos = startpos + 1;
-    }
-    pos++;
-  }
-  btrimws(normalized);
-  return normalized;
-}
-
-// Returns reference if refmap contains a reference with matching
-// label, otherwise NULL.
-extern reference* lookup_reference(reference** refmap, bstring lab)
-{
-  reference * ref = NULL;
-  bstring label = normalize_reference(lab);
-  if (refmap != NULL) {
-    HASH_FIND_STR(*refmap, (char*) label->data, ref);
-  }
-  bdestroy(label);
-  return ref;
-}
-
-extern reference* make_reference(bstring label, bstring url, bstring title)
-{
-  reference * ref;
-  ref = malloc(sizeof(reference));
-  ref->label = normalize_reference(label);
-  ref->url = bstrcpy(url);
-  ref->title = bstrcpy(title);
-  return ref;
-}
-
-extern void add_reference(reference** refmap, reference* ref)
-{
-  reference * t = NULL;
-  HASH_FIND(hh, *refmap, (char*) ref->label->data,
-            (unsigned) blength(ref->label), t);
-  if (t == NULL) {
-    HASH_ADD_KEYPTR(hh, *refmap, (char*) ref->label->data,
-                    (unsigned) blength(ref->label), ref);
-  } else {
-    free_reference(ref);  // we free this now since it won't be in the refmap
-  }
+static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap);
+static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap);
+static int subject_find_special_char(subject *subj);
+
+static unsigned char *bufdup(const unsigned char *buf)
+{
+	unsigned char *new = NULL;
+
+	if (buf) {
+		int len = strlen((char *)buf);
+		new = malloc(len + 1);
+		memcpy(new, buf, len + 1);
+	}
+
+	return new;
+}
+
+static inline node_inl *make_link_(node_inl *label, unsigned char *url, unsigned char *title)
+{
+	node_inl* e = (node_inl*) malloc(sizeof(node_inl));
+	e->tag = INL_LINK;
+	e->content.linkable.label = label;
+	e->content.linkable.url   = url;
+	e->content.linkable.title = title;
+	e->next = NULL;
+	return e;
+}
+
+inline static node_inl* make_ref_link(node_inl* label, reference *ref)
+{
+	return make_link_(label, bufdup(ref->url), bufdup(ref->title));
+}
+
+inline static node_inl* make_autolink(node_inl* label, chunk url, int is_email)
+{
+	return make_link_(label, clean_autolink(&url, is_email), NULL);
 }
 
 // Create an inline with a linkable string value.
-inline static inl* make_linkable(int t, inl* label, bstring url, bstring title)
+inline static node_inl* make_link(node_inl* label, chunk url, chunk title)
 {
-  inl* e = (inl*) malloc(sizeof(inl));
-  e->tag = t;
-  e->content.linkable.label = label;
-  e->content.linkable.url   = url;
-  e->content.linkable.title = title;
-  e->next = NULL;
-  return e;
+	return make_link_(label, clean_url(&url), clean_title(&title));
 }
 
-inline static inl* make_inlines(int t, inl* contents)
+inline static node_inl* make_inlines(int t, node_inl* contents)
 {
-  inl* e = (inl*) malloc(sizeof(inl));
-  e->tag = t;
-  e->content.inlines = contents;
-  e->next = NULL;
-  return e;
+	node_inl* e = (node_inl*) malloc(sizeof(node_inl));
+	e->tag = t;
+	e->content.inlines = contents;
+	e->next = NULL;
+	return e;
 }
 
 // Create an inline with a literal string value.
-inline static inl* make_literal(int t, bstring s)
+inline static node_inl* make_literal(int t, chunk s)
 {
-  inl* e = (inl*) malloc(sizeof(inl));
-  e->tag = t;
-  e->content.literal = s;
-  e->next = NULL;
-  return e;
+	node_inl* e = (node_inl*) malloc(sizeof(node_inl));
+	e->tag = t;
+	e->content.literal = s;
+	e->next = NULL;
+	return e;
 }
 
 // Create an inline with no value.
-inline static inl* make_simple(int t)
+inline static node_inl* make_simple(int t)
 {
-  inl* e = (inl*) malloc(sizeof(inl));
-  e->tag = t;
-  e->next = NULL;
-  return e;
+	node_inl* e = (node_inl*) malloc(sizeof(node_inl));
+	e->tag = t;
+	e->next = NULL;
+	return e;
 }
 
 // Macros for creating various kinds of inlines.
-#define make_str(s) make_literal(str, s)
-#define make_code(s) make_literal(code, s)
-#define make_raw_html(s) make_literal(raw_html, s)
-#define make_entity(s) make_literal(entity, s)
-#define make_linebreak() make_simple(linebreak)
-#define make_softbreak() make_simple(softbreak)
-#define make_link(label, url, title) make_linkable(link, label, url, title)
-#define make_image(alt, url, title) make_linkable(image, alt, url, title)
-#define make_emph(contents) make_inlines(emph, contents)
-#define make_strong(contents) make_inlines(strong, contents)
+#define make_str(s) make_literal(INL_STRING, s)
+#define make_code(s) make_literal(INL_CODE, s)
+#define make_raw_html(s) make_literal(INL_RAW_HTML, s)
+#define make_linebreak() make_simple(INL_LINEBREAK)
+#define make_softbreak() make_simple(INL_SOFTBREAK)
+#define make_emph(contents) make_inlines(INL_EMPH, contents)
+#define make_strong(contents) make_inlines(INL_STRONG, contents)
 
 // Free an inline list.
-extern void free_inlines(inl* e)
-{
-  inl * next;
-  while (e != NULL) {
-    switch (e->tag){
-    case str:
-    case raw_html:
-    case code:
-    case entity:
-      bdestroy(e->content.literal);
-      break;
-    case linebreak:
-    case softbreak:
-      break;
-    case link:
-    case image:
-      bdestroy(e->content.linkable.url);
-      bdestroy(e->content.linkable.title);
-      free_inlines(e->content.linkable.label);
-      break;
-    case emph:
-    case strong:
-      free_inlines(e->content.inlines);
-      break;
-    default:
-      break;
-    }
-    next = e->next;
-    free(e);
-    e = next;
-  }
+extern void free_inlines(node_inl* e)
+{
+	node_inl * next;
+	while (e != NULL) {
+		switch (e->tag){
+			case INL_STRING:
+			case INL_RAW_HTML:
+			case INL_CODE:
+				chunk_free(&e->content.literal);
+				break;
+			case INL_LINEBREAK:
+			case INL_SOFTBREAK:
+				break;
+			case INL_LINK:
+			case INL_IMAGE:
+				free(e->content.linkable.url);
+				free(e->content.linkable.title);
+				free_inlines(e->content.linkable.label);
+				break;
+			case INL_EMPH:
+			case INL_STRONG:
+				free_inlines(e->content.inlines);
+				break;
+			default:
+				break;
+		}
+		next = e->next;
+		free(e);
+		e = next;
+	}
 }
 
 // Append inline list b to the end of inline list a.
 // Return pointer to head of new list.
-inline static inl* append_inlines(inl* a, inl* b)
+inline static node_inl* append_inlines(node_inl* a, node_inl* b)
 {
-  if (a == NULL) {  // NULL acts like an empty list
-    return b;
-  }
-  inl* cur = a;
-  while (cur->next) {
-    cur = cur->next;
-  }
-  cur->next = b;
-  return a;
+	if (a == NULL) {  // NULL acts like an empty list
+		return b;
+	}
+	node_inl* cur = a;
+	while (cur->next) {
+		cur = cur->next;
+	}
+	cur->next = b;
+	return a;
 }
 
-// Make a 'subject' from an input string.
-static subject* make_subject(bstring s, reference** refmap)
+static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap)
 {
-  subject* e = (subject*) malloc(sizeof(subject));
-  // remove final whitespace
-  brtrimws(s);
-  e->buffer = s;
-  e->pos = 0;
-  e->label_nestlevel = 0;
-  e->reference_map = refmap;
-  return e;
+	e->input.data = buffer->ptr;
+	e->input.len = buffer->size;
+	e->input.alloc = 0;
+	e->pos = 0;
+	e->label_nestlevel = 0;
+	e->refmap = refmap;
+
+	chunk_rtrim(&e->input);
+}
+
+static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap)
+{
+	e->input.data = chunk->data;
+	e->input.len = chunk->len;
+	e->input.alloc = 0;
+	e->pos = 0;
+	e->label_nestlevel = 0;
+	e->refmap = refmap;
+
+	chunk_rtrim(&e->input);
 }
 
 inline static int isbacktick(int c)
 {
-  return (c == '`');
+	return (c == '`');
 }
 
-// Return the next character in the subject, without advancing.
-// Return 0 if at the end of the subject.
-#define peek_char(subj) bchar(subj->buffer, subj->pos)
+static inline unsigned char peek_char(subject *subj)
+{
+	return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
+}
+
+static inline unsigned char peek_at(subject *subj, int pos)
+{
+	return subj->input.data[pos];
+}
 
 // Return true if there are more characters in the subject.
 inline static int is_eof(subject* subj)
 {
-  return (subj->pos >= blength(subj->buffer));
+	return (subj->pos >= subj->input.len);
 }
 
 // Advance the subject.  Doesn't check for eof.
-#define advance(subj) subj->pos += 1
+#define advance(subj) (subj)->pos += 1
 
 // Take characters while a predicate holds, and return a string.
-inline static bstring take_while(subject* subj, int (*f)(int))
+inline static chunk take_while(subject* subj, int (*f)(int))
 {
-  unsigned char c;
-  int startpos = subj->pos;
-  int len = 0;
-  while ((c = peek_char(subj)) && (*f)(c)) {
-    advance(subj);
-    len++;
-  }
-  return bmidstr(subj->buffer, startpos, len);
-}
+	unsigned char c;
+	int startpos = subj->pos;
+	int len = 0;
 
-// Take one character and return a string, or NULL if eof.
-inline static bstring take_one(subject* subj)
-{
-  int startpos = subj->pos;
-  if (is_eof(subj)){
-    return NULL;
-  } else {
-    advance(subj);
-    return bmidstr(subj->buffer, startpos, 1);
-  }
+	while ((c = peek_char(subj)) && (*f)(c)) {
+		advance(subj);
+		len++;
+	}
+
+	return chunk_dup(&subj->input, startpos, len);
 }
 
 // Try to process a backtick code span that began with a
@@ -255,381 +220,353 @@ inline static bstring take_one(subject* subj)
 // after the closing backticks.
 static int scan_to_closing_backticks(subject* subj, int openticklength)
 {
-  // read non backticks
-  char c;
-  while ((c = peek_char(subj)) && c != '`') {
-    advance(subj);
-  }
-  if (is_eof(subj)) {
-    return 0;  // did not find closing ticks, return 0
-  }
-  int numticks = 0;
-  while (peek_char(subj) == '`') {
-      advance(subj);
-      numticks++;
-  }
-  if (numticks != openticklength){
-    return(scan_to_closing_backticks(subj, openticklength));
-  }
-  return (subj->pos);
-}
-
-// Destructively modify bstring, collapsing consecutive
-// space and newline characters into a single space.
-static int normalize_whitespace(bstring s)
-{
-  bool last_char_was_space = false;
-  int pos = 0;
-  char c;
-  while ((c = bchar(s, pos))) {
-    switch (c) {
-    case ' ':
-      if (last_char_was_space) {
-        bdelete(s, pos, 1);
-      } else {
-        pos++;
-      }
-      last_char_was_space = true;
-      break;
-    case '\n':
-      if (last_char_was_space) {
-        bdelete(s, pos, 1);
-      } else {
-        bdelete(s, pos, 1);
-        binsertch(s, pos, 1, ' ');
-        pos++;
-      }
-      last_char_was_space = true;
-      break;
-    default:
-      pos++;
-      last_char_was_space = false;
-    }
-  }
-  return 0;
+	// read non backticks
+	char c;
+	while ((c = peek_char(subj)) && c != '`') {
+		advance(subj);
+	}
+	if (is_eof(subj)) {
+		return 0;  // did not find closing ticks, return 0
+	}
+	int numticks = 0;
+	while (peek_char(subj) == '`') {
+		advance(subj);
+		numticks++;
+	}
+	if (numticks != openticklength){
+		return(scan_to_closing_backticks(subj, openticklength));
+	}
+	return (subj->pos);
 }
 
 // Parse backtick code section or raw backticks, return an inline.
 // Assumes that the subject has a backtick at the current position.
-static inl* handle_backticks(subject *subj)
-{
-  bstring openticks = take_while(subj, isbacktick);
-  bstring result;
-  int ticklength = blength(openticks);
-  int startpos = subj->pos;
-  int endpos = scan_to_closing_backticks(subj, ticklength);
-  if (endpos == 0) { // not found
-    subj->pos = startpos; // rewind
-    return make_str(openticks);
-  } else {
-    bdestroy(openticks);
-    result = bmidstr(subj->buffer, startpos, endpos - startpos - ticklength);
-    btrimws(result);
-    normalize_whitespace(result);
-    return make_code(result);
-  }
+static node_inl* handle_backticks(subject *subj)
+{
+	chunk openticks = take_while(subj, isbacktick);
+	int startpos = subj->pos;
+	int endpos = scan_to_closing_backticks(subj, openticks.len);
+
+	if (endpos == 0) { // not found
+		subj->pos = startpos; // rewind
+		return make_str(openticks);
+	} else {
+		strbuf buf = GH_BUF_INIT;
+
+		strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
+		strbuf_trim(&buf);
+		strbuf_normalize_whitespace(&buf);
+
+		return make_code(chunk_buf_detach(&buf));
+	}
 }
 
 // Scan ***, **, or * and return number scanned, or 0.
 // Don't advance position.
 static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close)
 {
-  int numdelims = 0;
-  char char_before, char_after;
-  int startpos = subj->pos;
-
-  char_before = subj->pos == 0 ? '\n' : bchar(subj->buffer, subj->pos - 1);
-  while (peek_char(subj) == c) {
-    numdelims++;
-    advance(subj);
-  }
-  char_after = peek_char(subj);
-  *can_open = numdelims > 0 && numdelims <= 3 && !isspace(char_after);
-  *can_close = numdelims > 0 && numdelims <= 3 && !isspace(char_before);
-  if (c == '_') {
-    *can_open = *can_open && !isalnum(char_before);
-    *can_close = *can_close && !isalnum(char_after);
-  }
-  subj->pos = startpos;
-  return numdelims;
+	int numdelims = 0;
+	char char_before, char_after;
+	int startpos = subj->pos;
+
+	char_before = subj->pos == 0 ? '\n' : peek_at(subj, subj->pos - 1);
+	while (peek_char(subj) == c) {
+		numdelims++;
+		advance(subj);
+	}
+	char_after = peek_char(subj);
+	*can_open = numdelims > 0 && numdelims <= 3 && !isspace(char_after);
+	*can_close = numdelims > 0 && numdelims <= 3 && !isspace(char_before);
+	if (c == '_') {
+		*can_open = *can_open && !isalnum(char_before);
+		*can_close = *can_close && !isalnum(char_after);
+	}
+	subj->pos = startpos;
+	return numdelims;
 }
 
 // Parse strong/emph or a fallback.
 // Assumes the subject has '_' or '*' at the current position.
-static inl* handle_strong_emph(subject* subj, char c)
-{
-  bool can_open, can_close;
-  inl * result = NULL;
-  inl ** last = malloc(sizeof(inl *));
-  inl * new;
-  inl * il;
-  inl * first_head = NULL;
-  inl * first_close = NULL;
-  int first_close_delims = 0;
-  int numdelims;
-
-  *last = NULL;
-
-  numdelims = scan_delims(subj, c, &can_open, &can_close);
-  subj->pos += numdelims;
-
-  new = make_str(bmidstr(subj->buffer, subj->pos - numdelims, numdelims));
-  *last = new;
-  first_head = new;
-  result = new;
-
-  if (!can_open || numdelims == 0) {
-    goto done;
-  }
-
-  switch (numdelims) {
-  case 1:
-    while (true) {
-      numdelims = scan_delims(subj, c, &can_open, &can_close);
-      if (numdelims >= 1 && can_close) {
-        subj->pos += 1;
-        first_head->tag = emph;
-        bdestroy(first_head->content.literal);
-        first_head->content.inlines = first_head->next;
-        first_head->next = NULL;
-        goto done;
-      } else {
-        if (!parse_inline(subj, last)) {
-          goto done;
-        }
-      }
-    }
-    break;
-  case 2:
-    while (true) {
-      numdelims = scan_delims(subj, c, &can_open, &can_close);
-      if (numdelims >= 2 && can_close) {
-        subj->pos += 2;
-        first_head->tag = strong;
-        bdestroy(first_head->content.literal);
-        first_head->content.inlines = first_head->next;
-        first_head->next = NULL;
-        goto done;
-      } else {
-        if (!parse_inline(subj, last)) {
-          goto done;
-        }
-      }
-    }
-    break;
-  case 3:
-    while (true) {
-      numdelims = scan_delims(subj, c, &can_open, &can_close);
-      if (can_close && numdelims >= 1 && numdelims <= 3 &&
-          numdelims != first_close_delims) {
-        new = make_str(bmidstr(subj->buffer, subj->pos, numdelims));
-        append_inlines(*last, new);
-        *last = new;
-
-        if (first_close_delims == 1 && numdelims > 2) {
-          numdelims = 2;
-        } else if (first_close_delims == 2) {
-          numdelims = 1;
-        } else if (numdelims == 3) {
-          // If we opened with ***, we interpret it as ** followed by *
-          // giving us <strong><em>
-          numdelims = 1;
-        }
-
-        subj->pos += numdelims;
-        if (first_close) {
-          first_head->tag = first_close_delims == 1 ? strong : emph;
-          bdestroy(first_head->content.literal);
-          first_head->content.inlines =
-            make_inlines(first_close_delims == 1 ? emph : strong,
-                         first_head->next);
-
-          il = first_head->next;
-          while (il->next && il->next != first_close) {
-            il = il->next;
-          }
-          il->next = NULL;
-
-          first_head->content.inlines->next = first_close->next;
-
-          il = first_head->content.inlines;
-          while (il->next && il->next != *last) {
-            il = il->next;
-          }
-          il->next = NULL;
-          free_inlines(*last);
-
-          first_close->next = NULL;
-          free_inlines(first_close);
-          first_head->next = NULL;
-          goto done;
-        } else {
-          first_close = *last;
-          first_close_delims = numdelims;
-        }
-      } else {
-        if (!parse_inline(subj, last)) {
-          goto done;
-        }
-      }
-    }
-    break;
-  default:
-    goto done;
-  }
-
- done:
-  free(last);
-  return result;
+static node_inl* handle_strong_emph(subject* subj, char c)
+{
+	bool can_open, can_close;
+	node_inl * result = NULL;
+	node_inl ** last = malloc(sizeof(node_inl *));
+	node_inl * new;
+	node_inl * il;
+	node_inl * first_head = NULL;
+	node_inl * first_close = NULL;
+	int first_close_delims = 0;
+	int numdelims;
+
+	*last = NULL;
+
+	numdelims = scan_delims(subj, c, &can_open, &can_close);
+	subj->pos += numdelims;
+
+	new = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims));
+	*last = new;
+	first_head = new;
+	result = new;
+
+	if (!can_open || numdelims == 0) {
+		goto done;
+	}
+
+	switch (numdelims) {
+		case 1:
+			while (true) {
+				numdelims = scan_delims(subj, c, &can_open, &can_close);
+				if (numdelims >= 1 && can_close) {
+					subj->pos += 1;
+					first_head->tag = INL_EMPH;
+					chunk_free(&first_head->content.literal);
+					first_head->content.inlines = first_head->next;
+					first_head->next = NULL;
+					goto done;
+				} else {
+					if (!parse_inline(subj, last)) {
+						goto done;
+					}
+				}
+			}
+			break;
+		case 2:
+			while (true) {
+				numdelims = scan_delims(subj, c, &can_open, &can_close);
+				if (numdelims >= 2 && can_close) {
+					subj->pos += 2;
+					first_head->tag = INL_STRONG;
+					chunk_free(&first_head->content.literal);
+					first_head->content.inlines = first_head->next;
+					first_head->next = NULL;
+					goto done;
+				} else {
+					if (!parse_inline(subj, last)) {
+						goto done;
+					}
+				}
+			}
+			break;
+		case 3:
+			while (true) {
+				numdelims = scan_delims(subj, c, &can_open, &can_close);
+				if (can_close && numdelims >= 1 && numdelims <= 3 &&
+						numdelims != first_close_delims) {
+					new = make_str(chunk_dup(&subj->input, subj->pos, numdelims));
+					append_inlines(*last, new);
+					*last = new;
+					if (first_close_delims == 1 && numdelims > 2) {
+						numdelims = 2;
+					} else if (first_close_delims == 2) {
+						numdelims = 1;
+					} else if (numdelims == 3) {
+						// If we opened with ***, we interpret it as ** followed by *
+						// giving us <strong><em>
+						numdelims = 1;
+					}
+					subj->pos += numdelims;
+					if (first_close) {
+						first_head->tag = first_close_delims == 1 ? INL_STRONG : INL_EMPH;
+						chunk_free(&first_head->content.literal);
+						first_head->content.inlines =
+							make_inlines(first_close_delims == 1 ? INL_EMPH : INL_STRONG,
+									first_head->next);
+
+						il = first_head->next;
+						while (il->next && il->next != first_close) {
+							il = il->next;
+						}
+						il->next = NULL;
+
+						first_head->content.inlines->next = first_close->next;
+
+						il = first_head->content.inlines;
+						while (il->next && il->next != *last) {
+							il = il->next;
+						}
+						il->next = NULL;
+						free_inlines(*last);
+
+						first_close->next = NULL;
+						free_inlines(first_close);
+						first_head->next = NULL;
+						goto done;
+					} else {
+						first_close = *last;
+						first_close_delims = numdelims;
+					}
+				} else {
+					if (!parse_inline(subj, last)) {
+						goto done;
+					}
+				}
+			}
+			break;
+		default:
+			goto done;
+	}
+
+done:
+	free(last);
+	return result;
 }
 
 // Parse backslash-escape or just a backslash, returning an inline.
-static inl* handle_backslash(subject *subj)
+static node_inl* handle_backslash(subject *subj)
 {
-  advance(subj);
-  unsigned char nextchar = peek_char(subj);
-  if (ispunct(nextchar)) {  // only ascii symbols and newline can be escaped
-    advance(subj);
-    return make_str(bformat("%c", nextchar));
-  } else if (nextchar == '\n') {
-    advance(subj);
-    return make_linebreak();
-  } else {
-    return make_str(bfromcstr("\\"));
-  }
+	advance(subj);
+	unsigned char nextchar = peek_char(subj);
+	if (ispunct(nextchar)) {  // only ascii symbols and newline can be escaped
+		advance(subj);
+		return make_str(chunk_dup(&subj->input, subj->pos - 1, 1));
+	} else if (nextchar == '\n') {
+		advance(subj);
+		return make_linebreak();
+	} else {
+		return make_str(chunk_literal("\\"));
+	}
 }
 
 // Parse an entity or a regular "&" string.
 // Assumes the subject has an '&' character at the current position.
-static inl* handle_entity(subject* subj)
+static node_inl* handle_entity(subject* subj)
 {
-  int match;
-  inl * result;
-  match = scan_entity(subj->buffer, subj->pos);
-  if (match) {
-    result = make_entity(bmidstr(subj->buffer, subj->pos, match));
-    subj->pos += match;
-  } else {
-    advance(subj);
-    result = make_str(bfromcstr("&"));
-  }
-  return result;
+	strbuf ent = GH_BUF_INIT;
+	size_t len;
+
+	advance(subj);
+
+	len = houdini_unescape_ent(&ent,
+		subj->input.data + subj->pos,
+		subj->input.len - subj->pos
+	);
+
+	if (len == 0)
+		return make_str(chunk_literal("&"));
+
+	subj->pos += len;
+	return make_str(chunk_buf_detach(&ent));
 }
 
 // Like make_str, but parses entities.
 // Returns an inline sequence consisting of str and entity elements.
-static inl * make_str_with_entities(bstring s)
-{
-  inl * result = NULL;
-  inl * new;
-  int searchpos;
-  char c;
-  subject * subj = make_subject(s, NULL);
-
-  while ((c = peek_char(subj))) {
-    switch (c) {
-    case '&':
-      new = handle_entity(subj);
-      break;
-    default:
-      searchpos = bstrchrp(subj->buffer, '&', subj->pos);
-      if (searchpos == BSTR_ERR) {
-        searchpos = blength(subj->buffer);
-      }
-      new = make_str(bmidstr(subj->buffer, subj->pos, searchpos - subj->pos));
-      subj->pos = searchpos;
-    }
-    result = append_inlines(result, new);
-  }
-  free(subj);
-  return result;
-}
-
-// Destructively unescape a string: remove backslashes before punctuation chars.
-extern int unescape(bstring url)
-{
-  // remove backslashes before punctuation chars:
-  int searchpos = 0;
-  while ((searchpos = bstrchrp(url, '\\', searchpos)) != BSTR_ERR) {
-    if (ispunct(bchar(url, searchpos + 1))) {
-      bdelete(url, searchpos, 1);
-    } else {
-      searchpos++;
-    }
-  }
-  return 0;
+static node_inl *make_str_with_entities(chunk *content)
+{
+	strbuf unescaped = GH_BUF_INIT;
+
+	if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) {
+		return make_str(chunk_buf_detach(&unescaped));
+	} else {
+		return make_str(*content);
+	}
 }
 
 // Clean a URL: remove surrounding whitespace and surrounding <>,
 // and remove \ that escape punctuation.
-static int clean_url(bstring url)
+unsigned char *clean_url(chunk *url)
+{
+	strbuf buf = GH_BUF_INIT;
+
+	chunk_trim(url);
+
+	if (url->len == 0)
+		return NULL;
+
+	if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
+		houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
+	} else {
+		houdini_unescape_html_f(&buf, url->data, url->len);
+	}
+
+	strbuf_unescape(&buf);
+	return strbuf_detach(&buf);
+}
+
+unsigned char *clean_autolink(chunk *url, int is_email)
 {
-  // remove surrounding <> if any:
-  int urllength = blength(url);
-  btrimws(url);
-  if (bchar(url, 0) == '<' && bchar(url, urllength - 1) == '>') {
-    bdelete(url, 0, 1);
-    bdelete(url, urllength - 2, 1);
-  }
-  unescape(url);
-  return 0;
+	strbuf buf = GH_BUF_INIT;
+
+	chunk_trim(url);
+
+	if (url->len == 0)
+		return NULL;
+
+	if (is_email)
+		strbuf_puts(&buf, "mailto:");
+
+	houdini_unescape_html_f(&buf, url->data, url->len);
+	return strbuf_detach(&buf);
 }
 
 // Clean a title: remove surrounding quotes and remove \ that escape punctuation.
-static int clean_title(bstring title)
+unsigned char *clean_title(chunk *title)
 {
-  // remove surrounding quotes if any:
-  int titlelength = blength(title);
-  if ((bchar(title, 0) == '\'' && bchar(title, titlelength - 1) == '\'') ||
-      (bchar(title, 0) == '(' && bchar(title, titlelength - 1) == ')') ||
-      (bchar(title, 0) == '"' && bchar(title, titlelength - 1) == '"')) {
-    bdelete(title, 0, 1);
-    bdelete(title, titlelength - 2, 1);
-  }
-  unescape(title);
-  return 0;
+	strbuf buf = GH_BUF_INIT;
+	unsigned char first, last;
+
+	if (title->len == 0)
+		return NULL;
+
+	first = title->data[0];
+	last = title->data[title->len - 1];
+
+	// remove surrounding quotes if any:
+	if ((first == '\'' && last == '\'') ||
+		(first == '(' && last == ')') ||
+		(first == '"' && last == '"')) {
+		houdini_unescape_html_f(&buf, title->data + 1, title->len - 2);
+	} else {
+		houdini_unescape_html_f(&buf, title->data, title->len);
+	}
+
+	strbuf_unescape(&buf);
+	return strbuf_detach(&buf);
 }
 
 // Parse an autolink or HTML tag.
 // Assumes the subject has a '<' character at the current position.
-static inl* handle_pointy_brace(subject* subj)
-{
-  int matchlen = 0;
-  bstring contents;
-  inl* result;
-
-  advance(subj);  // advance past first <
-  // first try to match a URL autolink
-  matchlen = scan_autolink_uri(subj->buffer, subj->pos);
-  if (matchlen > 0) {
-    contents = bmidstr(subj->buffer, subj->pos, matchlen - 1);
-    subj->pos += matchlen;
-    result =  make_link(make_str_with_entities(contents),
-                        bstrcpy(contents), bfromcstr(""));
-    bdestroy(contents);
-    return result;
-  }
-  // next try to match an email autolink
-  matchlen = scan_autolink_email(subj->buffer, subj->pos);
-  if (matchlen > 0) {
-    contents = bmidstr(subj->buffer, subj->pos, matchlen - 1);
-    subj->pos += matchlen;
-    result = make_link(make_str_with_entities(contents),
-                       bformat("mailto:%s", contents->data),
-                       bfromcstr(""));
-    bdestroy(contents);
-    return result;
-  }
-  // finally, try to match an html tag
-  matchlen = scan_html_tag(subj->buffer, subj->pos);
-  if (matchlen > 0) {
-    contents = bmidstr(subj->buffer, subj->pos, matchlen);
-    binsertch(contents, 0, 1, '<');
-    subj->pos += matchlen;
-    return make_raw_html(contents);
-  } else {// if nothing matches, just return the opening <:
-    return make_str(bfromcstr("<"));
-  }
+static node_inl* handle_pointy_brace(subject* subj)
+{
+	int matchlen = 0;
+	chunk contents;
+
+	advance(subj);  // advance past first <
+
+	// first try to match a URL autolink
+	matchlen = scan_autolink_uri(&subj->input, subj->pos);
+	if (matchlen > 0) {
+		contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
+		subj->pos += matchlen;
+
+		return make_autolink(
+			make_str_with_entities(&contents),
+			contents, 0
+		);
+	}
+
+	// next try to match an email autolink
+	matchlen = scan_autolink_email(&subj->input, subj->pos);
+	if (matchlen > 0) {
+		contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
+		subj->pos += matchlen;
+
+		return make_autolink(
+				make_str_with_entities(&contents),
+				contents, 1
+		);
+	}
+
+	// finally, try to match an html tag
+	matchlen = scan_html_tag(&subj->input, subj->pos);
+	if (matchlen > 0) {
+		contents = chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
+		subj->pos += matchlen;
+		return make_raw_html(contents);
+	}
+
+	// if nothing matches, just return the opening <:
+	return make_str(chunk_literal("<"));
 }
 
 // Parse a link label.  Returns 1 if successful.
@@ -641,366 +578,384 @@ static inl* handle_pointy_brace(subject* subj)
 // markers. So, 2 below contains a link while 1 does not:
 // 1. [a link `with a ](/url)` character
 // 2. [a link *with emphasized ](/url) text*
-static int link_label(subject* subj, bstring* raw_label)
-{
-  int nestlevel = 0;
-  inl* tmp = NULL;
-  bstring raw;
-  int startpos = subj->pos;
-  if (subj->label_nestlevel) {
-    // if we've already checked to the end of the subject
-    // for a label, even with a different starting [, we
-    // know we won't find one here and we can just return.
-    // Note:  nestlevel 1 would be: [foo [bar]
-    // nestlevel 2 would be: [foo [bar [baz]
-    subj->label_nestlevel--;
-    return 0;
-  }
-  advance(subj);  // advance past [
-  char c;
-  while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) {
-    switch (c) {
-    case '`':
-      tmp = handle_backticks(subj);
-      free_inlines(tmp);
-      break;
-    case '<':
-      tmp = handle_pointy_brace(subj);
-      free_inlines(tmp);
-      break;
-    case '[':  // nested []
-      nestlevel++;
-      advance(subj);
-      break;
-    case ']':  // nested []
-      nestlevel--;
-      advance(subj);
-      break;
-    case '\\':
-      advance(subj);
-      if (ispunct(peek_char(subj))) {
-        advance(subj);
-      }
-      break;
-    default:
-      advance(subj);
-    }
-  }
-  if (c == ']') {
-    if (raw_label != NULL) {
-      raw = bmidstr(subj->buffer, startpos + 1, subj->pos - (startpos + 1));
-      *raw_label = raw;
-    }
-    subj->label_nestlevel = 0;
-    advance(subj);  // advance past ]
-    return 1;
-  } else {
-    if (c == 0) {
-      subj->label_nestlevel = nestlevel;
-    }
-    subj->pos = startpos; // rewind
-    return 0;
-  }
+static int link_label(subject* subj, chunk *raw_label)
+{
+	int nestlevel = 0;
+	node_inl* tmp = NULL;
+	int startpos = subj->pos;
+
+	if (subj->label_nestlevel) {
+		// if we've already checked to the end of the subject
+		// for a label, even with a different starting [, we
+		// know we won't find one here and we can just return.
+		// Note:  nestlevel 1 would be: [foo [bar]
+		// nestlevel 2 would be: [foo [bar [baz]
+		subj->label_nestlevel--;
+		return 0;
+	}
+
+	advance(subj);  // advance past [
+	char c;
+	while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) {
+		switch (c) {
+			case '`':
+				tmp = handle_backticks(subj);
+				free_inlines(tmp);
+				break;
+			case '<':
+				tmp = handle_pointy_brace(subj);
+				free_inlines(tmp);
+				break;
+			case '[':  // nested []
+				nestlevel++;
+				advance(subj);
+				break;
+			case ']':  // nested []
+				nestlevel--;
+				advance(subj);
+				break;
+			case '\\':
+				advance(subj);
+				if (ispunct(peek_char(subj))) {
+					advance(subj);
+				}
+				break;
+			default:
+				advance(subj);
+		}
+	}
+	if (c == ']') {
+		*raw_label = chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
+		subj->label_nestlevel = 0;
+		advance(subj);  // advance past ]
+		return 1;
+	} else {
+		if (c == 0) {
+			subj->label_nestlevel = nestlevel;
+		}
+		subj->pos = startpos; // rewind
+		return 0;
+	}
 }
 
 // Parse a link or the link portion of an image, or return a fallback.
-static inl* handle_left_bracket(subject* subj)
-{
-  inl* lab = NULL;
-  inl* result = NULL;
-  reference* ref;
-  int n;
-  int sps;
-  int found_label;
-  int endlabel, starturl, endurl, starttitle, endtitle, endall;
-  bstring url, title, rawlabel, reflabel;
-  bstring rawlabel2 = NULL;
-  found_label = link_label(subj, &rawlabel);
-  endlabel = subj->pos;
-  if (found_label) {
-    if (peek_char(subj) == '(' &&
-        ((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) &&
-        ((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) {
-      // try to parse an explicit link:
-      starturl = subj->pos + 1 + sps; // after (
-      endurl = starturl + n;
-      starttitle = endurl + scan_spacechars(subj->buffer, endurl);
-      // ensure there are spaces btw url and title
-      endtitle = (starttitle == endurl) ? starttitle :
-                 starttitle + scan_link_title(subj->buffer, starttitle);
-      endall = endtitle + scan_spacechars(subj->buffer, endtitle);
-      if (bchar(subj->buffer, endall) == ')') {
-        subj->pos = endall + 1;
-        url = bmidstr(subj->buffer, starturl, endurl - starturl);
-        clean_url(url);
-        title = bmidstr(subj->buffer, starttitle, endtitle - starttitle);
-        clean_title(title);
-        lab = parse_inlines(rawlabel, NULL);
-        bdestroy(rawlabel);
-        return make_link(lab, url, title);
-      } else {
-        // if we get here, we matched a label but didn't get further:
-        subj->pos = endlabel;
-        lab = parse_inlines(rawlabel, subj->reference_map);
-        bdestroy(rawlabel);
-        result = append_inlines(make_str(bfromcstr("[")),
-                                append_inlines(lab,
-                                               make_str(bfromcstr("]"))));
-        return result;
-      }
-    } else {
-      // Check for reference link.
-      // First, see if there's another label:
-      subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel);
-      reflabel = rawlabel;
-      // if followed by a nonempty link label, we change reflabel to it:
-      if (peek_char(subj) == '[' &&
-          link_label(subj, &rawlabel2)) {
-        if (blength(rawlabel2) > 0) {
-          reflabel = rawlabel2;
-        }
-      } else {
-        subj->pos = endlabel;
-      }
-      // lookup rawlabel in subject->reference_map:
-      ref = lookup_reference(subj->reference_map, reflabel);
-      if (ref != NULL) { // found
-        lab = parse_inlines(rawlabel, NULL);
-        result = make_link(lab, bstrcpy(ref->url), bstrcpy(ref->title));
-      } else {
-        subj->pos = endlabel;
-        lab = parse_inlines(rawlabel, subj->reference_map);
-        result = append_inlines(make_str(bfromcstr("[")),
-                               append_inlines(lab, make_str(bfromcstr("]"))));
-      }
-      bdestroy(rawlabel);
-      bdestroy(rawlabel2);
-      return result;
-    }
-  }
-  // If we fall through to here, it means we didn't match a link:
-  advance(subj);  // advance past [
-  return make_str(bfromcstr("["));
+static node_inl* handle_left_bracket(subject* subj)
+{
+	node_inl *lab = NULL;
+	node_inl *result = NULL;
+	reference *ref;
+	int n;
+	int sps;
+	int found_label;
+	int endlabel, starturl, endurl, starttitle, endtitle, endall;
+
+	chunk rawlabel;
+	chunk url, title;
+
+	found_label = link_label(subj, &rawlabel);
+	endlabel = subj->pos;
+
+	if (found_label) {
+		if (peek_char(subj) == '(' &&
+				((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
+				((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {
+
+			// try to parse an explicit link:
+			starturl = subj->pos + 1 + sps; // after (
+			endurl = starturl + n;
+			starttitle = endurl + scan_spacechars(&subj->input, endurl);
+
+			// ensure there are spaces btw url and title
+			endtitle = (starttitle == endurl) ? starttitle :
+				starttitle + scan_link_title(&subj->input, starttitle);
+
+			endall = endtitle + scan_spacechars(&subj->input, endtitle);
+
+			if (peek_at(subj, endall) == ')') {
+				subj->pos = endall + 1;
+
+				url = chunk_dup(&subj->input, starturl, endurl - starturl);
+				title = chunk_dup(&subj->input, starttitle, endtitle - starttitle);
+				lab = parse_chunk_inlines(&rawlabel, NULL);
+
+				return make_link(lab, url, title);
+			} else {
+				// if we get here, we matched a label but didn't get further:
+				subj->pos = endlabel;
+				lab = parse_chunk_inlines(&rawlabel, subj->refmap);
+				result = append_inlines(make_str(chunk_literal("[")),
+						append_inlines(lab,
+							make_str(chunk_literal("]"))));
+				return result;
+			}
+		} else {
+			chunk rawlabel_tmp;
+			chunk reflabel;
+
+			// Check for reference link.
+			// First, see if there's another label:
+			subj->pos = subj->pos + scan_spacechars(&subj->input, endlabel);
+			reflabel = rawlabel;
+
+			// if followed by a nonempty link label, we change reflabel to it:
+			if (peek_char(subj) == '[' && link_label(subj, &rawlabel_tmp)) {
+				if (rawlabel_tmp.len > 0)
+					reflabel = rawlabel_tmp;
+			} else {
+				subj->pos = endlabel;
+			}
+
+			// lookup rawlabel in subject->reference_map:
+			ref = reference_lookup(subj->refmap, &reflabel);
+			if (ref != NULL) { // found
+				lab = parse_chunk_inlines(&rawlabel, NULL);
+				result = make_ref_link(lab, ref);
+			} else {
+				subj->pos = endlabel;
+				lab = parse_chunk_inlines(&rawlabel, subj->refmap);
+				result = append_inlines(make_str(chunk_literal("[")),
+						append_inlines(lab, make_str(chunk_literal("]"))));
+			}
+			return result;
+		}
+	}
+	// If we fall through to here, it means we didn't match a link:
+	advance(subj);  // advance past [
+	return make_str(chunk_literal("["));
 }
 
 // Parse a hard or soft linebreak, returning an inline.
 // Assumes the subject has a newline at the current position.
-static inl* handle_newline(subject *subj)
-{
-  int nlpos = subj->pos;
-  // skip over newline
-  advance(subj);
-  // skip spaces at beginning of line
-  while (peek_char(subj) == ' ') {
-    advance(subj);
-  }
-  if (nlpos > 1 &&
-      bchar(subj->buffer, nlpos - 1) == ' ' &&
-      bchar(subj->buffer, nlpos - 2) == ' ') {
-    return make_linebreak();
-  } else {
-    return make_softbreak();
-  }
+static node_inl* handle_newline(subject *subj)
+{
+	int nlpos = subj->pos;
+	// skip over newline
+	advance(subj);
+	// skip spaces at beginning of line
+	while (peek_char(subj) == ' ') {
+		advance(subj);
+	}
+	if (nlpos > 1 &&
+			peek_at(subj, nlpos - 1) == ' ' &&
+			peek_at(subj, nlpos - 2) == ' ') {
+		return make_linebreak();
+	} else {
+		return make_softbreak();
+	}
 }
 
 inline static int not_eof(subject* subj)
 {
-  return !is_eof(subj);
+	return !is_eof(subj);
 }
 
 // Parse inlines while a predicate is satisfied.  Return inlines.
-extern inl* parse_inlines_while(subject* subj, int (*f)(subject*))
+extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*))
 {
-  inl* result = NULL;
-  inl** last = &result;
-  while ((*f)(subj) && parse_inline(subj, last)) {
-  }
-  return result;
+	node_inl* result = NULL;
+	node_inl** last = &result;
+	while ((*f)(subj) && parse_inline(subj, last)) {
+	}
+	return result;
+}
+
+node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap)
+{
+	subject subj;
+	subject_from_chunk(&subj, chunk, refmap);
+	return parse_inlines_while(&subj, not_eof);
+}
+
+static int subject_find_special_char(subject *subj)
+{
+	// "\n\\`&_*[]<!"
+	static const int8_t SPECIAL_CHARS[256] = {
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1,
+		1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+	int n = subj->pos + 1;
+
+	while (n < subj->input.len) {
+		if (SPECIAL_CHARS[subj->input.data[n]])
+			return n;
+		n++;
+	}
+
+	return subj->input.len;
 }
 
 // Parse an inline, advancing subject, and add it to last element.
 // Adjust tail to point to new last element of list.
 // Return 0 if no inline can be parsed, 1 otherwise.
-extern int parse_inline(subject* subj, inl ** last)
-{
-  inl* new = NULL;
-  bstring contents;
-  bstring special_chars;
-  unsigned char c;
-  int endpos;
-  c = peek_char(subj);
-  if (c == 0) {
-    return 0;
-  }
-  switch(c){
-    case '\n':
-      new = handle_newline(subj);
-      break;
-    case '`':
-      new = handle_backticks(subj);
-      break;
-    case '\\':
-      new = handle_backslash(subj);
-      break;
-    case '&':
-      new = handle_entity(subj);
-      break;
-    case '<':
-      new = handle_pointy_brace(subj);
-      break;
-    case '_':
-      if (subj->pos > 0 && (isalnum(bchar(subj->buffer, subj->pos - 1)) ||
-                            bchar(subj->buffer, subj->pos - 1) == '_')) {
-        new = make_str(take_one(subj));
-      } else {
-        new = handle_strong_emph(subj, '_');
-      }
-      break;
-    case '*':
-      new = handle_strong_emph(subj, '*');
-      break;
-    case '[':
-      new = handle_left_bracket(subj);
-      break;
-    case '!':
-      advance(subj);
-      if (peek_char(subj) == '[') {
-        new = handle_left_bracket(subj);
-        if (new != NULL && new->tag == link) {
-          new->tag = image;
-        } else {
-          new = append_inlines(make_str(bfromcstr("!")), new);
-        }
-      } else {
-        new = make_str(bfromcstr("!"));
-      }
-      break;
-    default:
-      // we read until we hit a special character
-      special_chars = bfromcstr("\n\\`&_*[]<!");
-      endpos = binchr(subj->buffer, subj->pos, special_chars);
-      bdestroy(special_chars);
-      if (endpos == subj->pos) {
-        // current char is special: read a 1-character str
-        contents = take_one(subj);
-      } else if (endpos == BSTR_ERR) {
-        // special char not found, take whole rest of buffer:
-        endpos = subj->buffer->slen;
-        contents = bmidstr(subj->buffer, subj->pos, endpos - subj->pos);
-        subj->pos = endpos;
-      } else {
-        // take buffer from subj->pos to endpos to str.
-        contents = bmidstr(subj->buffer, subj->pos, endpos - subj->pos);
-        subj->pos = endpos;
-        // if we're at a newline, strip trailing spaces.
-        if (peek_char(subj) == '\n') {
-          brtrimws(contents);
-        }
-      }
-      new = make_str(contents);
-  }
-  if (*last == NULL) {
-    *last = new;
-  } else {
-    append_inlines(*last, new);
-  }
-  return 1;
-}
-
-extern inl* parse_inlines(bstring input, reference** refmap)
-{
-  subject * subj = make_subject(input, refmap);
-  inl * result = parse_inlines_while(subj, not_eof);
-  free(subj);
-  return result;
+static int parse_inline(subject* subj, node_inl ** last)
+{
+	node_inl* new = NULL;
+	chunk contents;
+	unsigned char c;
+	int endpos;
+	c = peek_char(subj);
+	if (c == 0) {
+		return 0;
+	}
+	switch(c){
+		case '\n':
+			new = handle_newline(subj);
+			break;
+		case '`':
+			new = handle_backticks(subj);
+			break;
+		case '\\':
+			new = handle_backslash(subj);
+			break;
+		case '&':
+			new = handle_entity(subj);
+			break;
+		case '<':
+			new = handle_pointy_brace(subj);
+			break;
+		case '_':
+			if (subj->pos > 0) {
+				unsigned char prev = peek_at(subj, subj->pos - 1);
+				if (isalnum(prev) || prev == '_') {
+					new = make_str(chunk_literal("_"));
+					advance(subj);
+					break;
+				}
+			}
+
+			new = handle_strong_emph(subj, '_');
+			break;
+		case '*':
+			new = handle_strong_emph(subj, '*');
+			break;
+		case '[':
+			new = handle_left_bracket(subj);
+			break;
+		case '!':
+			advance(subj);
+			if (peek_char(subj) == '[') {
+				new = handle_left_bracket(subj);
+				if (new != NULL && new->tag == INL_LINK) {
+					new->tag = INL_IMAGE;
+				} else {
+					new = append_inlines(make_str(chunk_literal("!")), new);
+				}
+			} else {
+				new = make_str(chunk_literal("!"));
+			}
+			break;
+		default:
+			endpos = subject_find_special_char(subj);
+			contents = chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
+			subj->pos = endpos;
+
+			// if we're at a newline, strip trailing spaces.
+			if (peek_char(subj) == '\n') {
+				chunk_rtrim(&contents);
+			}
+
+			new = make_str(contents);
+	}
+	if (*last == NULL) {
+		*last = new;
+	} else {
+		append_inlines(*last, new);
+	}
+	return 1;
+}
+
+extern node_inl* parse_inlines(strbuf *input, reference_map *refmap)
+{
+	subject subj;
+	subject_from_buf(&subj, input, refmap);
+	return parse_inlines_while(&subj, not_eof);
 }
 
 // Parse zero or more space characters, including at most one newline.
 void spnl(subject* subj)
 {
-  bool seen_newline = false;
-  while (peek_char(subj) == ' ' ||
-         (!seen_newline &&
-          (seen_newline = peek_char(subj) == '\n'))) {
-    advance(subj);
-  }
+	bool seen_newline = false;
+	while (peek_char(subj) == ' ' ||
+			(!seen_newline &&
+			 (seen_newline = peek_char(subj) == '\n'))) {
+		advance(subj);
+	}
 }
 
 // Parse reference.  Assumes string begins with '[' character.
 // Modify refmap if a reference is encountered.
 // Return 0 if no reference found, otherwise position of subject
 // after reference is parsed.
-extern int parse_reference(bstring input, reference** refmap)
-{
-  subject * subj = make_subject(input, NULL);
-  bstring lab = NULL;
-  bstring url = NULL;
-  bstring title = NULL;
-  int matchlen = 0;
-  int beforetitle;
-  reference * new = NULL;
-  int newpos;
-
-  // parse label:
-  if (!link_label(subj, &lab)) {
-    free(subj);
-    return 0;
-  }
-  // colon:
-  if (peek_char(subj) == ':') {
-    advance(subj);
-  } else {
-    free(subj);
-    bdestroy(lab);
-    return 0;
-  }
-  // parse link url:
-  spnl(subj);
-  matchlen = scan_link_url(subj->buffer, subj->pos);
-  if (matchlen) {
-    url = bmidstr(subj->buffer, subj->pos, matchlen);
-    clean_url(url);
-    subj->pos += matchlen;
-  } else {
-    free(subj);
-    bdestroy(lab);
-    bdestroy(url);
-    return 0;
-  }
-  // parse optional link_title
-  beforetitle = subj->pos;
-  spnl(subj);
-  matchlen = scan_link_title(subj->buffer, subj->pos);
-  if (matchlen) {
-    title = bmidstr(subj->buffer, subj->pos, matchlen);
-    clean_title(title);
-    subj->pos += matchlen;
-  } else {
-    subj->pos = beforetitle;
-    title = bfromcstr("");
-  }
-  // parse final spaces and newline:
-  while (peek_char(subj) == ' ') {
-    advance(subj);
-  }
-  if (peek_char(subj) == '\n') {
-    advance(subj);
-  } else if (peek_char(subj) != 0) {
-    free(subj);
-    bdestroy(lab);
-    bdestroy(url);
-    bdestroy(title);
-    return 0;
-  }
-  // insert reference into refmap
-  new = make_reference(lab, url, title);
-  add_reference(refmap, new);
-
-  newpos = subj->pos;
-  free(subj);
-  bdestroy(lab);
-  bdestroy(url);
-  bdestroy(title);
-  return newpos;
+int parse_reference_inline(strbuf *input, reference_map *refmap)
+{
+	subject subj;
+
+	chunk lab;
+	chunk url;
+	chunk title;
+
+	int matchlen = 0;
+	int beforetitle;
+
+	subject_from_buf(&subj, input, NULL);
+
+	// parse label:
+	if (!link_label(&subj, &lab))
+		return 0;
+
+	// colon:
+	if (peek_char(&subj) == ':') {
+		advance(&subj);
+	} else {
+		return 0;
+	}
+
+	// parse link url:
+	spnl(&subj);
+	matchlen = scan_link_url(&subj.input, subj.pos);
+	if (matchlen) {
+		url = chunk_dup(&subj.input, subj.pos, matchlen);
+		subj.pos += matchlen;
+	} else {
+		return 0;
+	}
+
+	// parse optional link_title
+	beforetitle = subj.pos;
+	spnl(&subj);
+	matchlen = scan_link_title(&subj.input, subj.pos);
+	if (matchlen) {
+		title = chunk_dup(&subj.input, subj.pos, matchlen);
+		subj.pos += matchlen;
+	} else {
+		subj.pos = beforetitle;
+		title = chunk_literal("");
+	}
+	// parse final spaces and newline:
+	while (peek_char(&subj) == ' ') {
+		advance(&subj);
+	}
+	if (peek_char(&subj) == '\n') {
+		advance(&subj);
+	} else if (peek_char(&subj) != 0) {
+		return 0;
+	}
+	// insert reference into refmap
+	reference_create(refmap, &lab, &url, &title);
+	return subj.pos;
 }
author	Kārlis Gaņģis <Knagis@users.noreply.github.com>	2014-10-08 11:02:43 +0300
committer	Kārlis Gaņģis <Knagis@users.noreply.github.com>	2014-10-08 11:02:43 +0300
commit	43b8646d444d6828014bdd38df956f5871d60efe (patch)
tree	66a1b9f6bd38822ee0c01a097ba7465157a5480c /src/inlines.c
parent	3567b844ca1fd7bec62801f8758545f7bd5cbaae (diff)
parent	daeb55edc7636deebc2a79621ea06c3548d67827 (diff)