From e216094e2192c05ddbd0988458eb8c0012e7baf8 Mon Sep 17 00:00:00 2001
From: Vicent Marti <tanoku@gmail.com>
Date: Tue, 2 Sep 2014 01:10:54 +0200
Subject: lol

---
 src/inlines.c | 1711 ++++++++++++++++++++++++++++++---------------------------
 1 file changed, 908 insertions(+), 803 deletions(-)

(limited to 'src/inlines.c')

diff --git a/src/inlines.c b/src/inlines.c
index f75c846..4ff45ad 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -2,133 +2,154 @@
 #include <stdio.h>
 #include <stdbool.h>
 #include <ctype.h>
-#include "bstrlib.h"
+#include <string.h>
+
 #include "stmd.h"
 #include "uthash.h"
 #include "debug.h"
 #include "scanners.h"
 #include "utf8.h"
 
+typedef struct Subject {
+  const gh_buf   *buffer;
+  int            pos;
+  reference**    reference_map;
+  int            label_nestlevel;
+} subject;
+
+reference* lookup_reference(reference** refmap, chunk *label);
+reference* make_reference(chunk *label, chunk *url, chunk *title);
+
+static unsigned char *clean_url(chunk *url);
+static unsigned char *clean_title(chunk *title);
+
+inline static unsigned char *chunk_to_cstr(chunk *c);
+inline static void chunk_free(chunk *c);
+inline static void chunk_trim(chunk *c);
+
+inline static chunk chunk_literal(const char *data);
+inline static chunk chunk_buf_detach(gh_buf *buf);
+inline static chunk chunk_buf(const gh_buf *buf, int pos, int len);
+
+static inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
+static inl *parse_inlines_while(subject* subj, int (*f)(subject*));
+static int parse_inline(subject* subj, inl ** last);
+
 extern void free_reference(reference *ref) {
-  bdestroy(ref->label);
-  bdestroy(ref->url);
-  bdestroy(ref->title);
-  free(ref);
+	free(ref->label);
+	free(ref->url);
+	free(ref->title);
+	free(ref);
 }
 
 extern void free_reference_map(reference **refmap) {
-  /* free the hash table contents */
-  reference *s;
-  reference *tmp;
-  if (refmap != NULL) {
-    HASH_ITER(hh, *refmap, s, tmp) {
-      HASH_DEL(*refmap, s);
-      free_reference(s);
-    }
-    free(refmap);
-  }
+	/* free the hash table contents */
+	reference *s;
+	reference *tmp;
+	if (refmap != NULL) {
+		HASH_ITER(hh, *refmap, s, tmp) {
+			HASH_DEL(*refmap, s);
+			free_reference(s);
+		}
+		free(refmap);
+	}
 }
 
 // normalize reference:  collapse internal whitespace to single space,
 // remove leading/trailing whitespace, case fold
-static bstring normalize_reference(bstring s)
-{
-  bstring normalized = case_fold(s);
-  int pos = 0;
-  int startpos;
-  char c;
-  while ((c = bchar(normalized, pos))) {
-    if (isspace(c)) {
-      startpos = pos;
-      // skip til next non-space
-      pos++;
-      while (isspace(bchar(s, pos))) {
-        pos++;
-      }
-      bdelete(normalized, startpos, pos - startpos);
-      binsertch(normalized, startpos, 1, ' ');
-      pos = startpos + 1;
-    }
-    pos++;
-  }
-  btrimws(normalized);
-  return normalized;
+static unsigned char *normalize_reference(chunk *ref)
+{
+	gh_buf normalized = GH_BUF_INIT;
+	int r, w;
+
+	utf8proc_case_fold(&normalized, ref->data, ref->len);
+	gh_buf_trim(&normalized);
+
+	for (r = 0, w = 0; r < normalized.size; ++r) {
+		if (r && gh_buf_at(&normalized, r - 1) == ' ') {
+			while (gh_buf_at(&normalized, r) == ' ')
+				r++;
+		}
+
+		normalized.ptr[w++] = normalized.ptr[r];
+	}
+
+	return gh_buf_detach(&normalized);
 }
 
 // Returns reference if refmap contains a reference with matching
 // label, otherwise NULL.
-extern reference* lookup_reference(reference** refmap, bstring lab)
+extern reference* lookup_reference(reference** refmap, chunk *label)
 {
-  reference * ref = NULL;
-  bstring label = normalize_reference(lab);
-  if (refmap != NULL) {
-    HASH_FIND_STR(*refmap, (char*) label->data, ref);
-  }
-  bdestroy(label);
-  return ref;
+	reference *ref = NULL;
+	unsigned char *norm = normalize_reference(label);
+	if (refmap != NULL) {
+		HASH_FIND_STR(*refmap, (char*)norm, ref);
+	}
+	free(label);
+	return ref;
 }
 
-extern reference* make_reference(bstring label, bstring url, bstring title)
+extern reference* make_reference(chunk *label, chunk *url, chunk *title)
 {
-  reference * ref;
-  ref = malloc(sizeof(reference));
-  ref->label = normalize_reference(label);
-  ref->url = bstrcpy(url);
-  ref->title = bstrcpy(title);
-  return ref;
+	reference *ref;
+	ref = malloc(sizeof(reference));
+	ref->label = normalize_reference(label);
+	ref->url = clean_url(url);
+	ref->title = clean_title(title);
+	return ref;
 }
 
 extern void add_reference(reference** refmap, reference* ref)
 {
-  reference * t = NULL;
-  HASH_FIND(hh, *refmap, (char*) ref->label->data,
-            (unsigned) blength(ref->label), t);
-  if (t == NULL) {
-    HASH_ADD_KEYPTR(hh, *refmap, (char*) ref->label->data,
-                    (unsigned) blength(ref->label), ref);
-  } else {
-    free_reference(ref);  // we free this now since it won't be in the refmap
-  }
+	reference * t = NULL;
+	HASH_FIND(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), t);
+
+	if (t == NULL) {
+		HASH_ADD_KEYPTR(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), ref);
+	} else {
+		free_reference(ref);  // we free this now since it won't be in the refmap
+	}
 }
 
 // Create an inline with a linkable string value.
-inline static inl* make_linkable(int t, inl* label, bstring url, bstring title)
+inline static inl* make_linkable(int t, inl* label, chunk url, chunk title)
 {
-  inl* e = (inl*) malloc(sizeof(inl));
-  e->tag = t;
-  e->content.linkable.label = label;
-  e->content.linkable.url   = url;
-  e->content.linkable.title = title;
-  e->next = NULL;
-  return e;
+	inl* e = (inl*) malloc(sizeof(inl));
+	e->tag = t;
+	e->content.linkable.label = label;
+	e->content.linkable.url   = chunk_to_cstr(&url);
+	e->content.linkable.title = chunk_to_cstr(&title);
+	e->next = NULL;
+	return e;
 }
 
 inline static inl* make_inlines(int t, inl* contents)
 {
-  inl* e = (inl*) malloc(sizeof(inl));
-  e->tag = t;
-  e->content.inlines = contents;
-  e->next = NULL;
-  return e;
+	inl* e = (inl*) malloc(sizeof(inl));
+	e->tag = t;
+	e->content.inlines = contents;
+	e->next = NULL;
+	return e;
 }
 
 // Create an inline with a literal string value.
-inline static inl* make_literal(int t, bstring s)
+inline static inl* make_literal(int t, chunk s)
 {
-  inl* e = (inl*) malloc(sizeof(inl));
-  e->tag = t;
-  e->content.literal = s;
-  e->next = NULL;
-  return e;
+	inl* e = (inl*) malloc(sizeof(inl));
+	e->tag = t;
+	e->content.literal = s;
+	e->next = NULL;
+	return e;
 }
 
 // Create an inline with no value.
 inline static inl* make_simple(int t)
 {
-  inl* e = (inl*) malloc(sizeof(inl));
-  e->tag = t;
-  e->next = NULL;
-  return e;
+	inl* e = (inl*) malloc(sizeof(inl));
+	e->tag = t;
+	e->next = NULL;
+	return e;
 }
 
 // Macros for creating various kinds of inlines.
@@ -139,113 +160,157 @@ inline static inl* make_simple(int t)
 #define make_linebreak() make_simple(linebreak)
 #define make_softbreak() make_simple(softbreak)
 #define make_link(label, url, title) make_linkable(link, label, url, title)
-#define make_image(alt, url, title) make_linkable(image, alt, url, title)
 #define make_emph(contents) make_inlines(emph, contents)
 #define make_strong(contents) make_inlines(strong, contents)
 
 // Free an inline list.
 extern void free_inlines(inl* e)
 {
-  inl * next;
-  while (e != NULL) {
-    switch (e->tag){
-    case str:
-    case raw_html:
-    case code:
-    case entity:
-      bdestroy(e->content.literal);
-      break;
-    case linebreak:
-    case softbreak:
-      break;
-    case link:
-    case image:
-      bdestroy(e->content.linkable.url);
-      bdestroy(e->content.linkable.title);
-      free_inlines(e->content.linkable.label);
-      break;
-    case emph:
-    case strong:
-      free_inlines(e->content.inlines);
-      break;
-    default:
-      break;
-    }
-    next = e->next;
-    free(e);
-    e = next;
-  }
+	inl * next;
+	while (e != NULL) {
+		switch (e->tag){
+			case str:
+			case raw_html:
+			case code:
+			case entity:
+				chunk_free(&e->content.literal);
+				break;
+			case linebreak:
+			case softbreak:
+				break;
+			case link:
+			case image:
+				free(e->content.linkable.url);
+				free(e->content.linkable.title);
+				free_inlines(e->content.linkable.label);
+				break;
+			case emph:
+			case strong:
+				free_inlines(e->content.inlines);
+				break;
+			default:
+				break;
+		}
+		next = e->next;
+		free(e);
+		e = next;
+	}
 }
 
 // Append inline list b to the end of inline list a.
 // Return pointer to head of new list.
 inline static inl* append_inlines(inl* a, inl* b)
 {
-  if (a == NULL) {  // NULL acts like an empty list
-    return b;
-  }
-  inl* cur = a;
-  while (cur->next) {
-    cur = cur->next;
-  }
-  cur->next = b;
-  return a;
+	if (a == NULL) {  // NULL acts like an empty list
+		return b;
+	}
+	inl* cur = a;
+	while (cur->next) {
+		cur = cur->next;
+	}
+	cur->next = b;
+	return a;
 }
 
 // Make a 'subject' from an input string.
-static subject* make_subject(bstring s, reference** refmap)
+static void init_subject(subject *e, gh_buf *buffer, int input_pos, reference** refmap)
 {
-  subject* e = (subject*) malloc(sizeof(subject));
-  // remove final whitespace
-  brtrimws(s);
-  e->buffer = s;
-  e->pos = 0;
-  e->label_nestlevel = 0;
-  e->reference_map = refmap;
-  return e;
+	e->buffer = buffer;
+	e->pos = input_pos;
+	e->label_nestlevel = 0;
+	e->reference_map = refmap;
 }
 
 inline static int isbacktick(int c)
 {
-  return (c == '`');
+	return (c == '`');
+}
+
+inline static void chunk_free(chunk *c)
+{
+	if (c->alloc)
+		free((char *)c->data);
+
+	c->data = NULL;
+	c->alloc = 0;
+	c->len = 0;
+}
+
+inline static void chunk_trim(chunk *c)
+{
+	while (c->len && isspace(c->data[0])) {
+		c->data++;
+		c->len--;
+	}
+
+	while (c->len > 0) {
+		if (!isspace(c->data[c->len - 1]))
+			break;
+
+		c->len--;
+	}
+}
+
+inline static unsigned char *chunk_to_cstr(chunk *c)
+{
+	unsigned char *str;
+
+	str = malloc(c->len + 1);
+	memcpy(str, c->data, c->len);
+	str[c->len] = 0;
+
+	return str;
+}
+
+inline static chunk chunk_literal(const char *data)
+{
+	chunk c = {data, strlen(data), 0};
+	return c;
+}
+
+inline static chunk chunk_buf(const gh_buf *buf, int pos, int len)
+{
+	chunk c = {buf->ptr + pos, len, 0};
+	return c;
+}
+
+inline static chunk chunk_buf_detach(gh_buf *buf)
+{
+	chunk c;
+
+	c.len = buf->size;
+	c.data = gh_buf_detach(buf);
+	c.alloc = 1;
+
+	return c;
 }
 
 // Return the next character in the subject, without advancing.
 // Return 0 if at the end of the subject.
-#define peek_char(subj) bchar(subj->buffer, subj->pos)
+#define peek_char(subj) gh_buf_at((subj)->buffer, (subj)->pos)
 
 // Return true if there are more characters in the subject.
 inline static int is_eof(subject* subj)
 {
-  return (subj->pos >= blength(subj->buffer));
+	return (subj->pos >= gh_buf_len(subj->buffer));
 }
 
 // Advance the subject.  Doesn't check for eof.
-#define advance(subj) subj->pos += 1
+#define advance(subj) (subj)->pos += 1
 
 // Take characters while a predicate holds, and return a string.
-inline static bstring take_while(subject* subj, int (*f)(int))
+inline static chunk take_while(subject* subj, int (*f)(int))
 {
-  unsigned char c;
-  int startpos = subj->pos;
-  int len = 0;
-  while ((c = peek_char(subj)) && (*f)(c)) {
-    advance(subj);
-    len++;
-  }
-  return bmidstr(subj->buffer, startpos, len);
-}
+	unsigned char c;
+	int startpos = subj->pos;
+	int len = 0;
 
-// Take one character and return a string, or NULL if eof.
-inline static bstring take_one(subject* subj)
-{
-  int startpos = subj->pos;
-  if (is_eof(subj)){
-    return NULL;
-  } else {
-    advance(subj);
-    return bmidstr(subj->buffer, startpos, 1);
-  }
+	while ((c = peek_char(subj)) && (*f)(c)) {
+		advance(subj);
+		len++;
+	}
+
+	return chunk_buf(subj->buffer, startpos, len);
 }
 
 // Try to process a backtick code span that began with a
@@ -255,381 +320,406 @@ inline static bstring take_one(subject* subj)
 // after the closing backticks.
 static int scan_to_closing_backticks(subject* subj, int openticklength)
 {
-  // read non backticks
-  char c;
-  while ((c = peek_char(subj)) && c != '`') {
-    advance(subj);
-  }
-  if (is_eof(subj)) {
-    return 0;  // did not find closing ticks, return 0
-  }
-  int numticks = 0;
-  while (peek_char(subj) == '`') {
-      advance(subj);
-      numticks++;
-  }
-  if (numticks != openticklength){
-    return(scan_to_closing_backticks(subj, openticklength));
-  }
-  return (subj->pos);
-}
-
-// Destructively modify bstring, collapsing consecutive
+	// read non backticks
+	char c;
+	while ((c = peek_char(subj)) && c != '`') {
+		advance(subj);
+	}
+	if (is_eof(subj)) {
+		return 0;  // did not find closing ticks, return 0
+	}
+	int numticks = 0;
+	while (peek_char(subj) == '`') {
+		advance(subj);
+		numticks++;
+	}
+	if (numticks != openticklength){
+		return(scan_to_closing_backticks(subj, openticklength));
+	}
+	return (subj->pos);
+}
+
+// Destructively modify string, collapsing consecutive
 // space and newline characters into a single space.
-static int normalize_whitespace(bstring s)
-{
-  bool last_char_was_space = false;
-  int pos = 0;
-  char c;
-  while ((c = bchar(s, pos))) {
-    switch (c) {
-    case ' ':
-      if (last_char_was_space) {
-        bdelete(s, pos, 1);
-      } else {
-        pos++;
-      }
-      last_char_was_space = true;
-      break;
-    case '\n':
-      if (last_char_was_space) {
-        bdelete(s, pos, 1);
-      } else {
-        bdelete(s, pos, 1);
-        binsertch(s, pos, 1, ' ');
-        pos++;
-      }
-      last_char_was_space = true;
-      break;
-    default:
-      pos++;
-      last_char_was_space = false;
-    }
-  }
-  return 0;
+static void normalize_whitespace(gh_buf *s)
+{
+	/* TODO */
+#if 0
+	bool last_char_was_space = false;
+	int pos = 0;
+	char c;
+	while ((c = gh_buf_at(s, pos))) {
+		switch (c) {
+			case ' ':
+				if (last_char_was_space) {
+					bdelete(s, pos, 1);
+				} else {
+					pos++;
+				}
+				last_char_was_space = true;
+				break;
+			case '\n':
+				if (last_char_was_space) {
+					bdelete(s, pos, 1);
+				} else {
+					bdelete(s, pos, 1);
+					binsertch(s, pos, 1, ' ');
+					pos++;
+				}
+				last_char_was_space = true;
+				break;
+			default:
+				pos++;
+				last_char_was_space = false;
+		}
+	}
+#endif
 }
 
 // Parse backtick code section or raw backticks, return an inline.
 // Assumes that the subject has a backtick at the current position.
 static inl* handle_backticks(subject *subj)
 {
-  bstring openticks = take_while(subj, isbacktick);
-  bstring result;
-  int ticklength = blength(openticks);
-  int startpos = subj->pos;
-  int endpos = scan_to_closing_backticks(subj, ticklength);
-  if (endpos == 0) { // not found
-    subj->pos = startpos; // rewind
-    return make_str(openticks);
-  } else {
-    bdestroy(openticks);
-    result = bmidstr(subj->buffer, startpos, endpos - startpos - ticklength);
-    btrimws(result);
-    normalize_whitespace(result);
-    return make_code(result);
-  }
+	chunk openticks = take_while(subj, isbacktick);
+	int startpos = subj->pos;
+	int endpos = scan_to_closing_backticks(subj, openticks.len);
+
+	if (endpos == 0) { // not found
+		subj->pos = startpos; // rewind
+		return make_str(openticks);
+	} else {
+		gh_buf buf = GH_BUF_INIT;
+
+		gh_buf_set(&buf, subj->buffer->ptr + startpos, endpos - startpos - openticks.len);
+		gh_buf_trim(&buf);
+		normalize_whitespace(&buf);
+
+		return make_code(chunk_buf_detach(&buf));
+	}
 }
 
 // Scan ***, **, or * and return number scanned, or 0.
 // Don't advance position.
 static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close)
 {
-  int numdelims = 0;
-  char char_before, char_after;
-  int startpos = subj->pos;
-
-  char_before = subj->pos == 0 ? '\n' : bchar(subj->buffer, subj->pos - 1);
-  while (peek_char(subj) == c) {
-    numdelims++;
-    advance(subj);
-  }
-  char_after = peek_char(subj);
-  *can_open = numdelims > 0 && numdelims <= 3 && !isspace(char_after);
-  *can_close = numdelims > 0 && numdelims <= 3 && !isspace(char_before);
-  if (c == '_') {
-    *can_open = *can_open && !isalnum(char_before);
-    *can_close = *can_close && !isalnum(char_after);
-  }
-  subj->pos = startpos;
-  return numdelims;
+	int numdelims = 0;
+	char char_before, char_after;
+	int startpos = subj->pos;
+
+	char_before = subj->pos == 0 ? '\n' : gh_buf_at(subj->buffer, subj->pos - 1);
+	while (peek_char(subj) == c) {
+		numdelims++;
+		advance(subj);
+	}
+	char_after = peek_char(subj);
+	*can_open = numdelims > 0 && numdelims <= 3 && !isspace(char_after);
+	*can_close = numdelims > 0 && numdelims <= 3 && !isspace(char_before);
+	if (c == '_') {
+		*can_open = *can_open && !isalnum(char_before);
+		*can_close = *can_close && !isalnum(char_after);
+	}
+	subj->pos = startpos;
+	return numdelims;
 }
 
 // Parse strong/emph or a fallback.
 // Assumes the subject has '_' or '*' at the current position.
 static inl* handle_strong_emph(subject* subj, char c)
 {
-  bool can_open, can_close;
-  inl * result = NULL;
-  inl ** last = malloc(sizeof(inl *));
-  inl * new;
-  inl * il;
-  inl * first_head = NULL;
-  inl * first_close = NULL;
-  int first_close_delims = 0;
-  int numdelims;
-
-  *last = NULL;
-
-  numdelims = scan_delims(subj, c, &can_open, &can_close);
-  subj->pos += numdelims;
-
-  new = make_str(bmidstr(subj->buffer, subj->pos - numdelims, numdelims));
-  *last = new;
-  first_head = new;
-  result = new;
-
-  if (!can_open || numdelims == 0) {
-    goto done;
-  }
-
-  switch (numdelims) {
-  case 1:
-    while (true) {
-      numdelims = scan_delims(subj, c, &can_open, &can_close);
-      if (numdelims >= 1 && can_close) {
-        subj->pos += 1;
-        first_head->tag = emph;
-        bdestroy(first_head->content.literal);
-        first_head->content.inlines = first_head->next;
-        first_head->next = NULL;
-        goto done;
-      } else {
-        if (!parse_inline(subj, last)) {
-          goto done;
-        }
-      }
-    }
-    break;
-  case 2:
-    while (true) {
-      numdelims = scan_delims(subj, c, &can_open, &can_close);
-      if (numdelims >= 2 && can_close) {
-        subj->pos += 2;
-        first_head->tag = strong;
-        bdestroy(first_head->content.literal);
-        first_head->content.inlines = first_head->next;
-        first_head->next = NULL;
-        goto done;
-      } else {
-        if (!parse_inline(subj, last)) {
-          goto done;
-        }
-      }
-    }
-    break;
-  case 3:
-    while (true) {
-      numdelims = scan_delims(subj, c, &can_open, &can_close);
-      if (can_close && numdelims >= 1 && numdelims <= 3 &&
-          numdelims != first_close_delims) {
-        new = make_str(bmidstr(subj->buffer, subj->pos, numdelims));
-        append_inlines(*last, new);
-        *last = new;
-
-        if (first_close_delims == 1 && numdelims > 2) {
-          numdelims = 2;
-        } else if (first_close_delims == 2) {
-          numdelims = 1;
-        } else if (numdelims == 3) {
-          // If we opened with ***, we interpret it as ** followed by *
-          // giving us <strong><em>
-          numdelims = 1;
-        }
-
-        subj->pos += numdelims;
-        if (first_close) {
-          first_head->tag = first_close_delims == 1 ? strong : emph;
-          bdestroy(first_head->content.literal);
-          first_head->content.inlines =
-            make_inlines(first_close_delims == 1 ? emph : strong,
-                         first_head->next);
-
-          il = first_head->next;
-          while (il->next && il->next != first_close) {
-            il = il->next;
-          }
-          il->next = NULL;
-
-          first_head->content.inlines->next = first_close->next;
-
-          il = first_head->content.inlines;
-          while (il->next && il->next != *last) {
-            il = il->next;
-          }
-          il->next = NULL;
-          free_inlines(*last);
-
-          first_close->next = NULL;
-          free_inlines(first_close);
-          first_head->next = NULL;
-          goto done;
-        } else {
-          first_close = *last;
-          first_close_delims = numdelims;
-        }
-      } else {
-        if (!parse_inline(subj, last)) {
-          goto done;
-        }
-      }
-    }
-    break;
-  default:
-    goto done;
-  }
-
- done:
-  free(last);
-  return result;
+	bool can_open, can_close;
+	inl * result = NULL;
+	inl ** last = malloc(sizeof(inl *));
+	inl * new;
+	inl * il;
+	inl * first_head = NULL;
+	inl * first_close = NULL;
+	int first_close_delims = 0;
+	int numdelims;
+
+	*last = NULL;
+
+	numdelims = scan_delims(subj, c, &can_open, &can_close);
+	subj->pos += numdelims;
+
+	new = make_str(chunk_buf(subj->buffer, subj->pos - numdelims, numdelims));
+	*last = new;
+	first_head = new;
+	result = new;
+
+	if (!can_open || numdelims == 0) {
+		goto done;
+	}
+
+	switch (numdelims) {
+		case 1:
+			while (true) {
+				numdelims = scan_delims(subj, c, &can_open, &can_close);
+				if (numdelims >= 1 && can_close) {
+					subj->pos += 1;
+					first_head->tag = emph;
+					chunk_free(&first_head->content.literal);
+					first_head->content.inlines = first_head->next;
+					first_head->next = NULL;
+					goto done;
+				} else {
+					if (!parse_inline(subj, last)) {
+						goto done;
+					}
+				}
+			}
+			break;
+		case 2:
+			while (true) {
+				numdelims = scan_delims(subj, c, &can_open, &can_close);
+				if (numdelims >= 2 && can_close) {
+					subj->pos += 2;
+					first_head->tag = strong;
+					chunk_free(&first_head->content.literal);
+					first_head->content.inlines = first_head->next;
+					first_head->next = NULL;
+					goto done;
+				} else {
+					if (!parse_inline(subj, last)) {
+						goto done;
+					}
+				}
+			}
+			break;
+		case 3:
+			while (true) {
+				numdelims = scan_delims(subj, c, &can_open, &can_close);
+				if (can_close && numdelims >= 1 && numdelims <= 3 &&
+						numdelims != first_close_delims) {
+					new = make_str(chunk_buf(subj->buffer, subj->pos, numdelims));
+					append_inlines(*last, new);
+					*last = new;
+					if (first_close_delims == 1 && numdelims > 2) {
+						numdelims = 2;
+					} else if (first_close_delims == 2) {
+						numdelims = 1;
+					} else if (numdelims == 3) {
+						// If we opened with ***, we interpret it as ** followed by *
+						// giving us <strong><em>
+						numdelims = 1;
+					}
+					subj->pos += numdelims;
+					if (first_close) {
+						first_head->tag = first_close_delims == 1 ? strong : emph;
+						chunk_free(&first_head->content.literal);
+						first_head->content.inlines =
+							make_inlines(first_close_delims == 1 ? emph : strong,
+									first_head->next);
+
+						il = first_head->next;
+						while (il->next && il->next != first_close) {
+							il = il->next;
+						}
+						il->next = NULL;
+
+						first_head->content.inlines->next = first_close->next;
+
+						il = first_head->content.inlines;
+						while (il->next && il->next != *last) {
+							il = il->next;
+						}
+						il->next = NULL;
+						free_inlines(*last);
+
+						first_close->next = NULL;
+						free_inlines(first_close);
+						first_head->next = NULL;
+						goto done;
+					} else {
+						first_close = *last;
+						first_close_delims = numdelims;
+					}
+				} else {
+					if (!parse_inline(subj, last)) {
+						goto done;
+					}
+				}
+			}
+			break;
+		default:
+			goto done;
+	}
+
+done:
+	free(last);
+	return result;
 }
 
 // Parse backslash-escape or just a backslash, returning an inline.
 static inl* handle_backslash(subject *subj)
 {
-  advance(subj);
-  unsigned char nextchar = peek_char(subj);
-  if (ispunct(nextchar)) {  // only ascii symbols and newline can be escaped
-    advance(subj);
-    return make_str(bformat("%c", nextchar));
-  } else if (nextchar == '\n') {
-    advance(subj);
-    return make_linebreak();
-  } else {
-    return make_str(bfromcstr("\\"));
-  }
+	advance(subj);
+	unsigned char nextchar = peek_char(subj);
+	if (ispunct(nextchar)) {  // only ascii symbols and newline can be escaped
+		advance(subj);
+		return make_str(chunk_buf(subj->buffer, subj->pos - 1, 1));
+	} else if (nextchar == '\n') {
+		advance(subj);
+		return make_linebreak();
+	} else {
+		return make_str(chunk_literal("\\"));
+	}
 }
 
 // Parse an entity or a regular "&" string.
 // Assumes the subject has an '&' character at the current position.
 static inl* handle_entity(subject* subj)
 {
-  int match;
-  inl * result;
-  match = scan_entity(subj->buffer, subj->pos);
-  if (match) {
-    result = make_entity(bmidstr(subj->buffer, subj->pos, match));
-    subj->pos += match;
-  } else {
-    advance(subj);
-    result = make_str(bfromcstr("&"));
-  }
-  return result;
+	int match;
+	inl *result;
+	match = scan_entity(subj->buffer, subj->pos);
+	if (match) {
+		result = make_entity(chunk_buf(subj->buffer, subj->pos, match));
+		subj->pos += match;
+	} else {
+		advance(subj);
+		result = make_str(chunk_literal("&"));
+	}
+	return result;
 }
 
 // Like make_str, but parses entities.
 // Returns an inline sequence consisting of str and entity elements.
-static inl * make_str_with_entities(bstring s)
-{
-  inl * result = NULL;
-  inl * new;
-  int searchpos;
-  char c;
-  subject * subj = make_subject(s, NULL);
-
-  while ((c = peek_char(subj))) {
-    switch (c) {
-    case '&':
-      new = handle_entity(subj);
-      break;
-    default:
-      searchpos = bstrchrp(subj->buffer, '&', subj->pos);
-      if (searchpos == BSTR_ERR) {
-        searchpos = blength(subj->buffer);
-      }
-      new = make_str(bmidstr(subj->buffer, subj->pos, searchpos - subj->pos));
-      subj->pos = searchpos;
-    }
-    result = append_inlines(result, new);
-  }
-  free(subj);
-  return result;
+static inl *make_str_with_entities(chunk *content)
+{
+	inl * result = NULL;
+	inl * new;
+	int searchpos;
+	char c;
+	subject subj;
+	gh_buf content_buf = GH_BUF_INIT;
+
+	gh_buf_set(&content_buf, content->data, content->len);
+	init_subject(&subj, &content_buf, 0, NULL);
+
+	while ((c = peek_char(&subj))) {
+		switch (c) {
+			case '&':
+				new = handle_entity(&subj);
+				break;
+			default:
+				searchpos = gh_buf_strchr(subj.buffer, '&', subj.pos);
+				if (searchpos < 0) {
+					searchpos = gh_buf_len(subj.buffer);
+				}
+
+				new = make_str(chunk_buf(subj.buffer, subj.pos, searchpos - subj.pos));
+				subj.pos = searchpos;
+		}
+		result = append_inlines(result, new);
+	}
+
+	gh_buf_free(&content_buf);
+	return result;
 }
 
 // Destructively unescape a string: remove backslashes before punctuation chars.
-extern int unescape(bstring url)
+extern void unescape_buffer(gh_buf *buf)
 {
-  // remove backslashes before punctuation chars:
-  int searchpos = 0;
-  while ((searchpos = bstrchrp(url, '\\', searchpos)) != BSTR_ERR) {
-    if (ispunct(bchar(url, searchpos + 1))) {
-      bdelete(url, searchpos, 1);
-    } else {
-      searchpos++;
-    }
-  }
-  return 0;
+	int r, w;
+
+	for (r = 0, w = 0; r < buf->size; ++r) {
+		if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1]))
+			continue;
+
+		buf->ptr[w++] = buf->ptr[r];
+	}
+
+	gh_buf_truncate(buf, w);
 }
 
 // Clean a URL: remove surrounding whitespace and surrounding <>,
 // and remove \ that escape punctuation.
-static int clean_url(bstring url)
+static unsigned char *clean_url(chunk *url)
 {
-  // remove surrounding <> if any:
-  int urllength = blength(url);
-  btrimws(url);
-  if (bchar(url, 0) == '<' && bchar(url, urllength - 1) == '>') {
-    bdelete(url, 0, 1);
-    bdelete(url, urllength - 2, 1);
-  }
-  unescape(url);
-  return 0;
+	gh_buf buf = GH_BUF_INIT;
+
+	chunk_trim(url);
+
+	if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
+		gh_buf_set(&buf, url->data + 1, url->len - 2);
+	} else {
+		gh_buf_set(&buf, url->data, url->len);
+	}
+
+	unescape_buffer(&buf);
+	return gh_buf_detach(&buf);
 }
 
 // Clean a title: remove surrounding quotes and remove \ that escape punctuation.
-static int clean_title(bstring title)
+static unsigned char *clean_title(chunk *title)
 {
-  // remove surrounding quotes if any:
-  int titlelength = blength(title);
-  if ((bchar(title, 0) == '\'' && bchar(title, titlelength - 1) == '\'') ||
-      (bchar(title, 0) == '(' && bchar(title, titlelength - 1) == ')') ||
-      (bchar(title, 0) == '"' && bchar(title, titlelength - 1) == '"')) {
-    bdelete(title, 0, 1);
-    bdelete(title, titlelength - 2, 1);
-  }
-  unescape(title);
-  return 0;
+	gh_buf buf = GH_BUF_INIT;
+	unsigned char first = title->data[0];
+	unsigned char last = title->data[title->len - 1];
+
+	// remove surrounding quotes if any:
+	if ((first == '\'' && last == '\'') ||
+		(first == '(' && last == ')') ||
+		(first == '"' && last == '"')) {
+		gh_buf_set(&buf, title->data + 1, title->len - 2);
+	} else {
+		gh_buf_set(&buf, title->data, title->len);
+	}
+
+	unescape_buffer(&buf);
+	return gh_buf_detach(&buf);
 }
 
 // Parse an autolink or HTML tag.
 // Assumes the subject has a '<' character at the current position.
 static inl* handle_pointy_brace(subject* subj)
 {
-  int matchlen = 0;
-  bstring contents;
-  inl* result;
-
-  advance(subj);  // advance past first <
-  // first try to match a URL autolink
-  matchlen = scan_autolink_uri(subj->buffer, subj->pos);
-  if (matchlen > 0) {
-    contents = bmidstr(subj->buffer, subj->pos, matchlen - 1);
-    subj->pos += matchlen;
-    result =  make_link(make_str_with_entities(contents),
-                        bstrcpy(contents), bfromcstr(""));
-    bdestroy(contents);
-    return result;
-  }
-  // next try to match an email autolink
-  matchlen = scan_autolink_email(subj->buffer, subj->pos);
-  if (matchlen > 0) {
-    contents = bmidstr(subj->buffer, subj->pos, matchlen - 1);
-    subj->pos += matchlen;
-    result = make_link(make_str_with_entities(contents),
-                       bformat("mailto:%s", contents->data),
-                       bfromcstr(""));
-    bdestroy(contents);
-    return result;
-  }
-  // finally, try to match an html tag
-  matchlen = scan_html_tag(subj->buffer, subj->pos);
-  if (matchlen > 0) {
-    contents = bmidstr(subj->buffer, subj->pos, matchlen);
-    binsertch(contents, 0, 1, '<');
-    subj->pos += matchlen;
-    return make_raw_html(contents);
-  } else {// if nothing matches, just return the opening <:
-    return make_str(bfromcstr("<"));
-  }
+	int matchlen = 0;
+	chunk contents;
+
+	advance(subj);  // advance past first <
+
+	// first try to match a URL autolink
+	matchlen = scan_autolink_uri(subj->buffer, subj->pos);
+	if (matchlen > 0) {
+		contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1);
+		subj->pos += matchlen;
+
+		return make_link(
+			make_str_with_entities(&contents),
+			contents,
+			chunk_literal("")
+		);
+	}
+
+	// next try to match an email autolink
+	matchlen = scan_autolink_email(subj->buffer, subj->pos);
+	if (matchlen > 0) {
+		gh_buf mail_url = GH_BUF_INIT;
+
+		contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1);
+		subj->pos += matchlen;
+
+		gh_buf_puts(&mail_url, "mailto:");
+		gh_buf_put(&mail_url, contents.data, contents.len);
+
+		return make_link(
+				make_str_with_entities(&contents),
+				chunk_buf_detach(&mail_url),
+				chunk_literal("")
+		);
+	}
+
+	// finally, try to match an html tag
+	matchlen = scan_html_tag(subj->buffer, subj->pos);
+	if (matchlen > 0) {
+		contents = chunk_buf(subj->buffer, subj->pos - 1, matchlen + 1);
+		subj->pos += matchlen;
+		return make_raw_html(contents);
+	}
+
+	// if nothing matches, just return the opening <:
+	return make_str(chunk_literal("<"));
 }
 
 // Parse a link label.  Returns 1 if successful.
@@ -641,366 +731,381 @@ static inl* handle_pointy_brace(subject* subj)
 // markers. So, 2 below contains a link while 1 does not:
 // 1. [a link `with a ](/url)` character
 // 2. [a link *with emphasized ](/url) text*
-static int link_label(subject* subj, bstring* raw_label)
-{
-  int nestlevel = 0;
-  inl* tmp = NULL;
-  bstring raw;
-  int startpos = subj->pos;
-  if (subj->label_nestlevel) {
-    // if we've already checked to the end of the subject
-    // for a label, even with a different starting [, we
-    // know we won't find one here and we can just return.
-    // Note:  nestlevel 1 would be: [foo [bar]
-    // nestlevel 2 would be: [foo [bar [baz]
-    subj->label_nestlevel--;
-    return 0;
-  }
-  advance(subj);  // advance past [
-  char c;
-  while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) {
-    switch (c) {
-    case '`':
-      tmp = handle_backticks(subj);
-      free_inlines(tmp);
-      break;
-    case '<':
-      tmp = handle_pointy_brace(subj);
-      free_inlines(tmp);
-      break;
-    case '[':  // nested []
-      nestlevel++;
-      advance(subj);
-      break;
-    case ']':  // nested []
-      nestlevel--;
-      advance(subj);
-      break;
-    case '\\':
-      advance(subj);
-      if (ispunct(peek_char(subj))) {
-        advance(subj);
-      }
-      break;
-    default:
-      advance(subj);
-    }
-  }
-  if (c == ']') {
-    if (raw_label != NULL) {
-      raw = bmidstr(subj->buffer, startpos + 1, subj->pos - (startpos + 1));
-      *raw_label = raw;
-    }
-    subj->label_nestlevel = 0;
-    advance(subj);  // advance past ]
-    return 1;
-  } else {
-    if (c == 0) {
-      subj->label_nestlevel = nestlevel;
-    }
-    subj->pos = startpos; // rewind
-    return 0;
-  }
+static int link_label(subject* subj, chunk *raw_label)
+{
+	int nestlevel = 0;
+	inl* tmp = NULL;
+	int startpos = subj->pos;
+
+	if (subj->label_nestlevel) {
+		// if we've already checked to the end of the subject
+		// for a label, even with a different starting [, we
+		// know we won't find one here and we can just return.
+		// Note:  nestlevel 1 would be: [foo [bar]
+		// nestlevel 2 would be: [foo [bar [baz]
+		subj->label_nestlevel--;
+		return 0;
+	}
+
+	advance(subj);  // advance past [
+	char c;
+	while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) {
+		switch (c) {
+			case '`':
+				tmp = handle_backticks(subj);
+				free_inlines(tmp);
+				break;
+			case '<':
+				tmp = handle_pointy_brace(subj);
+				free_inlines(tmp);
+				break;
+			case '[':  // nested []
+				nestlevel++;
+				advance(subj);
+				break;
+			case ']':  // nested []
+				nestlevel--;
+				advance(subj);
+				break;
+			case '\\':
+				advance(subj);
+				if (ispunct(peek_char(subj))) {
+					advance(subj);
+				}
+				break;
+			default:
+				advance(subj);
+		}
+	}
+	if (c == ']') {
+		*raw_label = chunk_buf(
+			subj->buffer,
+			startpos + 1,
+			subj->pos - (startpos + 1)
+		);
+
+		subj->label_nestlevel = 0;
+		advance(subj);  // advance past ]
+		return 1;
+	} else {
+		if (c == 0) {
+			subj->label_nestlevel = nestlevel;
+		}
+		subj->pos = startpos; // rewind
+		return 0;
+	}
 }
 
 // Parse a link or the link portion of an image, or return a fallback.
 static inl* handle_left_bracket(subject* subj)
 {
-  inl* lab = NULL;
-  inl* result = NULL;
-  reference* ref;
-  int n;
-  int sps;
-  int found_label;
-  int endlabel, starturl, endurl, starttitle, endtitle, endall;
-  bstring url, title, rawlabel, reflabel;
-  bstring rawlabel2 = NULL;
-  found_label = link_label(subj, &rawlabel);
-  endlabel = subj->pos;
-  if (found_label) {
-    if (peek_char(subj) == '(' &&
-        ((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) &&
-        ((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) {
-      // try to parse an explicit link:
-      starturl = subj->pos + 1 + sps; // after (
-      endurl = starturl + n;
-      starttitle = endurl + scan_spacechars(subj->buffer, endurl);
-      // ensure there are spaces btw url and title
-      endtitle = (starttitle == endurl) ? starttitle :
-                 starttitle + scan_link_title(subj->buffer, starttitle);
-      endall = endtitle + scan_spacechars(subj->buffer, endtitle);
-      if (bchar(subj->buffer, endall) == ')') {
-        subj->pos = endall + 1;
-        url = bmidstr(subj->buffer, starturl, endurl - starturl);
-        clean_url(url);
-        title = bmidstr(subj->buffer, starttitle, endtitle - starttitle);
-        clean_title(title);
-        lab = parse_inlines(rawlabel, NULL);
-        bdestroy(rawlabel);
-        return make_link(lab, url, title);
-      } else {
-        // if we get here, we matched a label but didn't get further:
-        subj->pos = endlabel;
-        lab = parse_inlines(rawlabel, subj->reference_map);
-        bdestroy(rawlabel);
-        result = append_inlines(make_str(bfromcstr("[")),
-                                append_inlines(lab,
-                                               make_str(bfromcstr("]"))));
-        return result;
-      }
-    } else {
-      // Check for reference link.
-      // First, see if there's another label:
-      subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel);
-      reflabel = rawlabel;
-      // if followed by a nonempty link label, we change reflabel to it:
-      if (peek_char(subj) == '[' &&
-          link_label(subj, &rawlabel2)) {
-        if (blength(rawlabel2) > 0) {
-          reflabel = rawlabel2;
-        }
-      } else {
-        subj->pos = endlabel;
-      }
-      // lookup rawlabel in subject->reference_map:
-      ref = lookup_reference(subj->reference_map, reflabel);
-      if (ref != NULL) { // found
-        lab = parse_inlines(rawlabel, NULL);
-        result = make_link(lab, bstrcpy(ref->url), bstrcpy(ref->title));
-      } else {
-        subj->pos = endlabel;
-        lab = parse_inlines(rawlabel, subj->reference_map);
-        result = append_inlines(make_str(bfromcstr("[")),
-                               append_inlines(lab, make_str(bfromcstr("]"))));
-      }
-      bdestroy(rawlabel);
-      bdestroy(rawlabel2);
-      return result;
-    }
-  }
-  // If we fall through to here, it means we didn't match a link:
-  advance(subj);  // advance past [
-  return make_str(bfromcstr("["));
+	inl *lab = NULL;
+	inl *result = NULL;
+	reference *ref;
+	int n;
+	int sps;
+	int found_label;
+	int endlabel, starturl, endurl, starttitle, endtitle, endall;
+
+	chunk rawlabel;
+	chunk url, title;
+
+	found_label = link_label(subj, &rawlabel);
+	endlabel = subj->pos;
+
+	if (found_label) {
+		if (peek_char(subj) == '(' &&
+				((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) &&
+				((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) {
+
+			// try to parse an explicit link:
+			starturl = subj->pos + 1 + sps; // after (
+			endurl = starturl + n;
+			starttitle = endurl + scan_spacechars(subj->buffer, endurl);
+
+			// ensure there are spaces btw url and title
+			endtitle = (starttitle == endurl) ? starttitle :
+				starttitle + scan_link_title(subj->buffer, starttitle);
+
+			endall = endtitle + scan_spacechars(subj->buffer, endtitle);
+
+			if (gh_buf_at(subj->buffer, endall) == ')') {
+				subj->pos = endall + 1;
+
+				url = chunk_buf(subj->buffer, starturl, endurl - starturl);
+				title = chunk_buf(subj->buffer, starttitle, endtitle - starttitle);
+				lab = parse_chunk_inlines(&rawlabel, NULL);
+
+				return make_link(lab, url, title);
+			} else {
+				// if we get here, we matched a label but didn't get further:
+				subj->pos = endlabel;
+				lab = parse_chunk_inlines(&rawlabel, subj->reference_map);
+				result = append_inlines(make_str(chunk_literal("[")),
+						append_inlines(lab,
+							make_str(chunk_literal("]"))));
+				return result;
+			}
+		} else {
+			chunk rawlabel_tmp;
+			chunk reflabel;
+
+			// Check for reference link.
+			// First, see if there's another label:
+			subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel);
+			reflabel = rawlabel;
+
+			// if followed by a nonempty link label, we change reflabel to it:
+			if (peek_char(subj) == '[' && link_label(subj, &rawlabel_tmp)) {
+				if (rawlabel_tmp.len > 0)
+					reflabel = rawlabel_tmp;
+			} else {
+				subj->pos = endlabel;
+			}
+
+			// lookup rawlabel in subject->reference_map:
+			ref = lookup_reference(subj->reference_map, &reflabel);
+			if (ref != NULL) { // found
+				lab = parse_chunk_inlines(&rawlabel, NULL);
+				result = make_link(lab, chunk_literal(ref->url), chunk_literal(ref->title));
+			} else {
+				subj->pos = endlabel;
+				lab = parse_chunk_inlines(&rawlabel, subj->reference_map);
+				result = append_inlines(make_str(chunk_literal("[")),
+						append_inlines(lab, make_str(chunk_literal("]"))));
+			}
+			return result;
+		}
+	}
+	// If we fall through to here, it means we didn't match a link:
+	advance(subj);  // advance past [
+	return make_str(chunk_literal("["));
 }
 
 // Parse a hard or soft linebreak, returning an inline.
 // Assumes the subject has a newline at the current position.
 static inl* handle_newline(subject *subj)
 {
-  int nlpos = subj->pos;
-  // skip over newline
-  advance(subj);
-  // skip spaces at beginning of line
-  while (peek_char(subj) == ' ') {
-    advance(subj);
-  }
-  if (nlpos > 1 &&
-      bchar(subj->buffer, nlpos - 1) == ' ' &&
-      bchar(subj->buffer, nlpos - 2) == ' ') {
-    return make_linebreak();
-  } else {
-    return make_softbreak();
-  }
+	int nlpos = subj->pos;
+	// skip over newline
+	advance(subj);
+	// skip spaces at beginning of line
+	while (peek_char(subj) == ' ') {
+		advance(subj);
+	}
+	if (nlpos > 1 &&
+			gh_buf_at(subj->buffer, nlpos - 1) == ' ' &&
+			gh_buf_at(subj->buffer, nlpos - 2) == ' ') {
+		return make_linebreak();
+	} else {
+		return make_softbreak();
+	}
 }
 
 inline static int not_eof(subject* subj)
 {
-  return !is_eof(subj);
+	return !is_eof(subj);
 }
 
 // Parse inlines while a predicate is satisfied.  Return inlines.
 extern inl* parse_inlines_while(subject* subj, int (*f)(subject*))
 {
-  inl* result = NULL;
-  inl** last = &result;
-  while ((*f)(subj) && parse_inline(subj, last)) {
-  }
-  return result;
+	inl* result = NULL;
+	inl** last = &result;
+	while ((*f)(subj) && parse_inline(subj, last)) {
+	}
+	return result;
+}
+
+inl *parse_chunk_inlines(chunk *chunk, reference** refmap)
+{
+	inl *result;
+	subject subj;
+	gh_buf full_chunk = GH_BUF_INIT;
+
+	gh_buf_set(&full_chunk, chunk->data, chunk->len);
+	init_subject(&subj, &full_chunk, 0, refmap);
+	result = parse_inlines_while(&subj, not_eof);
+
+	gh_buf_free(&full_chunk);
+	return result;
+}
+
+static int find_special_char(subject *subj)
+{
+	int n = subj->pos + 1;
+	int size = (int)gh_buf_len(subj->buffer);
+
+	while (n < size) {
+		if (strchr("\n\\`&_*[]<!", gh_buf_at(subj->buffer, n)))
+			return n;
+	}
+
+	return -1;
 }
 
 // Parse an inline, advancing subject, and add it to last element.
 // Adjust tail to point to new last element of list.
 // Return 0 if no inline can be parsed, 1 otherwise.
-extern int parse_inline(subject* subj, inl ** last)
-{
-  inl* new = NULL;
-  bstring contents;
-  bstring special_chars;
-  unsigned char c;
-  int endpos;
-  c = peek_char(subj);
-  if (c == 0) {
-    return 0;
-  }
-  switch(c){
-    case '\n':
-      new = handle_newline(subj);
-      break;
-    case '`':
-      new = handle_backticks(subj);
-      break;
-    case '\\':
-      new = handle_backslash(subj);
-      break;
-    case '&':
-      new = handle_entity(subj);
-      break;
-    case '<':
-      new = handle_pointy_brace(subj);
-      break;
-    case '_':
-      if (subj->pos > 0 && (isalnum(bchar(subj->buffer, subj->pos - 1)) ||
-                            bchar(subj->buffer, subj->pos - 1) == '_')) {
-        new = make_str(take_one(subj));
-      } else {
-        new = handle_strong_emph(subj, '_');
-      }
-      break;
-    case '*':
-      new = handle_strong_emph(subj, '*');
-      break;
-    case '[':
-      new = handle_left_bracket(subj);
-      break;
-    case '!':
-      advance(subj);
-      if (peek_char(subj) == '[') {
-        new = handle_left_bracket(subj);
-        if (new != NULL && new->tag == link) {
-          new->tag = image;
-        } else {
-          new = append_inlines(make_str(bfromcstr("!")), new);
-        }
-      } else {
-        new = make_str(bfromcstr("!"));
-      }
-      break;
-    default:
-      // we read until we hit a special character
-      special_chars = bfromcstr("\n\\`&_*[]<!");
-      endpos = binchr(subj->buffer, subj->pos, special_chars);
-      bdestroy(special_chars);
-      if (endpos == subj->pos) {
-        // current char is special: read a 1-character str
-        contents = take_one(subj);
-      } else if (endpos == BSTR_ERR) {
-        // special char not found, take whole rest of buffer:
-        endpos = subj->buffer->slen;
-        contents = bmidstr(subj->buffer, subj->pos, endpos - subj->pos);
-        subj->pos = endpos;
-      } else {
-        // take buffer from subj->pos to endpos to str.
-        contents = bmidstr(subj->buffer, subj->pos, endpos - subj->pos);
-        subj->pos = endpos;
-        // if we're at a newline, strip trailing spaces.
-        if (peek_char(subj) == '\n') {
-          brtrimws(contents);
-        }
-      }
-      new = make_str(contents);
-  }
-  if (*last == NULL) {
-    *last = new;
-  } else {
-    append_inlines(*last, new);
-  }
-  return 1;
-}
-
-extern inl* parse_inlines(bstring input, reference** refmap)
-{
-  subject * subj = make_subject(input, refmap);
-  inl * result = parse_inlines_while(subj, not_eof);
-  free(subj);
-  return result;
+static int parse_inline(subject* subj, inl ** last)
+{
+	inl* new = NULL;
+	chunk contents;
+	unsigned char c;
+	int endpos;
+	c = peek_char(subj);
+	if (c == 0) {
+		return 0;
+	}
+	switch(c){
+		case '\n':
+			new = handle_newline(subj);
+			break;
+		case '`':
+			new = handle_backticks(subj);
+			break;
+		case '\\':
+			new = handle_backslash(subj);
+			break;
+		case '&':
+			new = handle_entity(subj);
+			break;
+		case '<':
+			new = handle_pointy_brace(subj);
+			break;
+		case '_':
+			if (subj->pos > 0 && (isalnum(gh_buf_at(subj->buffer, subj->pos - 1)) ||
+						gh_buf_at(subj->buffer, subj->pos - 1) == '_')) {
+				goto text_literal;
+			}
+
+			new = handle_strong_emph(subj, '_');
+			break;
+		case '*':
+			new = handle_strong_emph(subj, '*');
+			break;
+		case '[':
+			new = handle_left_bracket(subj);
+			break;
+		case '!':
+			advance(subj);
+			if (peek_char(subj) == '[') {
+				new = handle_left_bracket(subj);
+				if (new != NULL && new->tag == link) {
+					new->tag = image;
+				} else {
+					new = append_inlines(make_str(chunk_literal("!")), new);
+				}
+			} else {
+				new = make_str(chunk_literal("!"));
+			}
+			break;
+		default:
+		text_literal:
+			endpos = find_special_char(subj);
+			if (endpos < 0) {
+				endpos = gh_buf_len(subj->buffer);
+			}
+
+			contents = chunk_buf(subj->buffer, subj->pos, endpos - subj->pos);
+			subj->pos = endpos;
+
+			// if we're at a newline, strip trailing spaces.
+			if (peek_char(subj) == '\n') {
+				chunk_trim(&contents);
+			}
+
+			new = make_str(contents);
+	}
+	if (*last == NULL) {
+		*last = new;
+	} else {
+		append_inlines(*last, new);
+	}
+	return 1;
+}
+
+extern inl* parse_inlines(gh_buf *input, int input_pos, reference** refmap)
+{
+	subject subj;
+	init_subject(&subj, input, input_pos, refmap);
+	return parse_inlines_while(&subj, not_eof);
 }
 
 // Parse zero or more space characters, including at most one newline.
 void spnl(subject* subj)
 {
-  bool seen_newline = false;
-  while (peek_char(subj) == ' ' ||
-         (!seen_newline &&
-          (seen_newline = peek_char(subj) == '\n'))) {
-    advance(subj);
-  }
+	bool seen_newline = false;
+	while (peek_char(subj) == ' ' ||
+			(!seen_newline &&
+			 (seen_newline = peek_char(subj) == '\n'))) {
+		advance(subj);
+	}
 }
 
 // Parse reference.  Assumes string begins with '[' character.
 // Modify refmap if a reference is encountered.
 // Return 0 if no reference found, otherwise position of subject
 // after reference is parsed.
-extern int parse_reference(bstring input, reference** refmap)
-{
-  subject * subj = make_subject(input, NULL);
-  bstring lab = NULL;
-  bstring url = NULL;
-  bstring title = NULL;
-  int matchlen = 0;
-  int beforetitle;
-  reference * new = NULL;
-  int newpos;
-
-  // parse label:
-  if (!link_label(subj, &lab)) {
-    free(subj);
-    return 0;
-  }
-  // colon:
-  if (peek_char(subj) == ':') {
-    advance(subj);
-  } else {
-    free(subj);
-    bdestroy(lab);
-    return 0;
-  }
-  // parse link url:
-  spnl(subj);
-  matchlen = scan_link_url(subj->buffer, subj->pos);
-  if (matchlen) {
-    url = bmidstr(subj->buffer, subj->pos, matchlen);
-    clean_url(url);
-    subj->pos += matchlen;
-  } else {
-    free(subj);
-    bdestroy(lab);
-    bdestroy(url);
-    return 0;
-  }
-  // parse optional link_title
-  beforetitle = subj->pos;
-  spnl(subj);
-  matchlen = scan_link_title(subj->buffer, subj->pos);
-  if (matchlen) {
-    title = bmidstr(subj->buffer, subj->pos, matchlen);
-    clean_title(title);
-    subj->pos += matchlen;
-  } else {
-    subj->pos = beforetitle;
-    title = bfromcstr("");
-  }
-  // parse final spaces and newline:
-  while (peek_char(subj) == ' ') {
-    advance(subj);
-  }
-  if (peek_char(subj) == '\n') {
-    advance(subj);
-  } else if (peek_char(subj) != 0) {
-    free(subj);
-    bdestroy(lab);
-    bdestroy(url);
-    bdestroy(title);
-    return 0;
-  }
-  // insert reference into refmap
-  new = make_reference(lab, url, title);
-  add_reference(refmap, new);
-
-  newpos = subj->pos;
-  free(subj);
-  bdestroy(lab);
-  bdestroy(url);
-  bdestroy(title);
-  return newpos;
+extern int parse_reference(gh_buf *input, int input_pos, reference** refmap)
+{
+	subject subj;
+
+	chunk lab;
+	chunk url;
+	chunk title;
+
+	int matchlen = 0;
+	int beforetitle;
+	reference * new = NULL;
+
+	init_subject(&subj, input, input_pos, NULL);
+
+	// parse label:
+	if (!link_label(&subj, &lab))
+		return 0;
+
+	// colon:
+	if (peek_char(&subj) == ':') {
+		advance(&subj);
+	} else {
+		return 0;
+	}
+
+	// parse link url:
+	spnl(&subj);
+	matchlen = scan_link_url(subj.buffer, subj.pos);
+	if (matchlen) {
+		url = chunk_buf(subj.buffer, subj.pos, matchlen);
+		subj.pos += matchlen;
+	} else {
+		return 0;
+	}
+
+	// parse optional link_title
+	beforetitle = subj.pos;
+	spnl(&subj);
+	matchlen = scan_link_title(subj.buffer, subj.pos);
+	if (matchlen) {
+		title = chunk_buf(subj.buffer, subj.pos, matchlen);
+		subj.pos += matchlen;
+	} else {
+		subj.pos = beforetitle;
+		title = chunk_literal("");
+	}
+	// parse final spaces and newline:
+	while (peek_char(&subj) == ' ') {
+		advance(&subj);
+	}
+	if (peek_char(&subj) == '\n') {
+		advance(&subj);
+	} else if (peek_char(&subj) != 0) {
+		return 0;
+	}
+	// insert reference into refmap
+	new = make_reference(&lab, &url, &title);
+	add_reference(refmap, new);
+
+	return subj.pos;
 }
 
-- 
cgit v1.2.3


From 582674e662d1f8757350c51486a5e0a837195e15 Mon Sep 17 00:00:00 2001
From: Vicent Marti <tanoku@gmail.com>
Date: Tue, 2 Sep 2014 13:18:04 +0200
Subject: ffffix

---
 Makefile      |  11 ++-
 src/blocks.c  |  58 +++++++----
 src/buffer.c  |  69 +++++--------
 src/buffer.h  |  19 ++--
 src/html.c    | 276 ----------------------------------------------------
 src/inlines.c |   4 +-
 src/main.c    | 142 ++++++++++++---------------
 src/print.c   | 307 ++++++++++++++++++++++++++++++----------------------------
 src/stmd.h    |  13 +--
 src/utf8.c    |   6 +-
 src/utf8.h    |   6 --
 11 files changed, 304 insertions(+), 607 deletions(-)
 delete mode 100644 src/html.c
 delete mode 100644 src/utf8.h

(limited to 'src/inlines.c')

diff --git a/Makefile b/Makefile
index cb5938d..d14a928 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ DATADIR=data
 PROG=./stmd
 
 .PHONY: all oldtests test spec benchjs testjs
-all: $(SRCDIR)/case_fold_switch.c $(PROG)
+all: $(SRCDIR)/case_fold_switch.inc $(PROG)
 
 README.html: README.md template.html
 	pandoc --template template.html -S -s -t html5 -o $@ $<
@@ -41,13 +41,16 @@ testjs: spec.txt
 benchjs:
 	node js/bench.js ${BENCHINP}
 
-$(PROG): $(SRCDIR)/main.c $(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/html.o $(SRCDIR)/utf8.o
+HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o
+STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o
+
+$(PROG): $(SRCDIR)/main.c $(HTML_OBJ) $(STMD_OBJ)
 	$(CC) $(LDFLAGS) -o $@ $^
 
 $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re
 	re2c --case-insensitive -bis $< > $@ || (rm $@ && false)
 
-$(SRCDIR)/case_fold_switch.inc $(DATADIR)/CaseFolding-3.2.0.txt
+$(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding-3.2.0.txt
 	perl mkcasefold.pl < $< > $@
 
 .PHONY: leakcheck clean fuzztest dingus upload
@@ -72,7 +75,7 @@ update-site: spec.html narrative.html
 	(cd _site ; git pull ; git commit -a -m "Updated site for latest spec, narrative, js" ; git push; cd ..)
 
 clean:
-	-rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c
+	-rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c $(SRCDIR)/html/*.o
 	-rm -rf *.dSYM
 	-rm -f README.html
 	-rm -f spec.md fuzz.txt spec.html
diff --git a/src/blocks.c b/src/blocks.c
index eabac03..71dc830 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -3,11 +3,12 @@
 #include <stdio.h>
 #include <stdbool.h>
 #include <ctype.h>
-#include "bstrlib.h"
+
 #include "stmd.h"
-#include "uthash.h"
-#include "debug.h"
 #include "scanners.h"
+#include "uthash.h"
+
+static void finalize(block* b, int line_number);
 
 static block* make_block(int tag, int start_line, int start_column)
 {
@@ -140,7 +141,7 @@ static int break_out_of_lists(block ** bptr, int line_number)
 }
 
 
-extern void finalize(block* b, int line_number)
+static void finalize(block* b, int line_number)
 {
 	int firstlinelen;
 	int pos;
@@ -364,7 +365,7 @@ static int lists_match(struct ListData list_data,
 			list_data.bullet_char == item_data.bullet_char);
 }
 
-static void expand_tabs(gh_buf *ob, const char *line, size_t size)
+static void expand_tabs(gh_buf *ob, const unsigned char *line, size_t size)
 {
 	size_t  i = 0, tab = 0;
 
@@ -389,13 +390,43 @@ static void expand_tabs(gh_buf *ob, const char *line, size_t size)
 	}
 }
 
-extern block *stmd_parse_document(const char *buffer, size_t len)
+static block *finalize_parsing(block *document, int linenum)
 {
-	gh_buf line = GH_BUF_INIT;
+	while (document != document->top) {
+		finalize(document, linenum);
+		document = document->parent;
+	}
+
+	finalize(document, linenum);
+	process_inlines(document, document->attributes.refmap);
+
+	return document;
+}
 
+extern block *stmd_parse_file(FILE *f)
+{
+	gh_buf line = GH_BUF_INIT;
+	unsigned char buffer[4096];
+	int linenum = 1;
 	block *document = make_document();
+
+	while (fgets((char *)buffer, sizeof(buffer), f)) {
+		expand_tabs(&line, buffer, strlen(buffer));
+		incorporate_line(&line, linenum, &document);
+		gh_buf_clear(&line);
+		linenum++;
+	}
+
+	gh_buf_free(&line);
+	return finalize_document(document, linenum);
+}
+
+extern block *stmd_parse_document(const unsigned char *buffer, size_t len)
+{
+	gh_buf line = GH_BUF_INIT;
 	int linenum = 1;
-	const char *end = buffer + len;
+	const unsigned char *end = buffer + len;
+	block *document = make_document();
 
 	while (buffer < end) {
 		const char *eol = memchr(buffer, '\n', end - buffer);
@@ -414,16 +445,7 @@ extern block *stmd_parse_document(const char *buffer, size_t len)
 	}
 
 	gh_buf_free(&line);
-
-	while (document != document->top) {
-		finalize(document, linenum);
-		document = document->parent;
-	}
-
-	finalize(document, linenum);
-	process_inlines(document, document->attributes.refmap);
-
-	return document;
+	return finalize_document(document, linenum);
 }
 
 // Process one line at a time, modifying a block.
diff --git a/src/buffer.c b/src/buffer.c
index b81e7fa..17dc864 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -31,10 +31,10 @@ void gh_buf_init(gh_buf *buf, int initial_size)
 
 int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom)
 {
-	char *new_ptr;
-	size_t new_size;
+	unsigned char *new_ptr;
+	int new_size;
 
-	if (buf->ptr == gh_buf__oom || buf->asize < 0)
+	if (buf->ptr == gh_buf__oom)
 		return -1;
 
 	if (target_size <= buf->asize)
@@ -79,7 +79,7 @@ void gh_buf_free(gh_buf *buf)
 {
 	if (!buf) return;
 
-	if (buf->asize > 0 && buf->ptr != gh_buf__initbuf && buf->ptr != gh_buf__oom)
+	if (buf->ptr != gh_buf__initbuf && buf->ptr != gh_buf__oom)
 		free(buf->ptr);
 
 	gh_buf_init(buf, 0);
@@ -91,14 +91,9 @@ void gh_buf_clear(gh_buf *buf)
 
 	if (buf->asize > 0)
 		buf->ptr[0] = '\0';
-
-	if (buf->asize < 0) {
-		buf->ptr = gh_buf__initbuf;
-		buf->asize = 0;
-	}
 }
 
-int gh_buf_set(gh_buf *buf, const char *data, int len)
+int gh_buf_set(gh_buf *buf, const unsigned char *data, int len)
 {
 	if (len == 0 || data == NULL) {
 		gh_buf_clear(buf);
@@ -115,10 +110,12 @@ int gh_buf_set(gh_buf *buf, const char *data, int len)
 
 int gh_buf_sets(gh_buf *buf, const char *string)
 {
-	return gh_buf_set(buf, string, string ? strlen(string) : 0);
+	return gh_buf_set(buf,
+		(const unsigned char *)string,
+		string ? strlen(string) : 0);
 }
 
-int gh_buf_putc(gh_buf *buf, char c)
+int gh_buf_putc(gh_buf *buf, int c)
 {
 	ENSURE_SIZE(buf, buf->size + 2);
 	buf->ptr[buf->size++] = c;
@@ -126,7 +123,7 @@ int gh_buf_putc(gh_buf *buf, char c)
 	return 0;
 }
 
-int gh_buf_put(gh_buf *buf, const char *data, int len)
+int gh_buf_put(gh_buf *buf, const unsigned char *data, int len)
 {
 	ENSURE_SIZE(buf, buf->size + len + 1);
 	memmove(buf->ptr + buf->size, data, len);
@@ -137,8 +134,7 @@ int gh_buf_put(gh_buf *buf, const char *data, int len)
 
 int gh_buf_puts(gh_buf *buf, const char *string)
 {
-	assert(string);
-	return gh_buf_put(buf, string, strlen(string));
+	return gh_buf_put(buf, (const unsigned char *)string, strlen(string));
 }
 
 int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap)
@@ -153,7 +149,7 @@ int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap)
 		va_copy(args, ap);
 
 		len = vsnprintf(
-			buf->ptr + buf->size,
+			(char *)buf->ptr + buf->size,
 			buf->asize - buf->size,
 			format, args
 		);
@@ -187,9 +183,9 @@ int gh_buf_printf(gh_buf *buf, const char *format, ...)
 	return r;
 }
 
-void gh_buf_copy_cstr(char *data, size_t datasize, const gh_buf *buf)
+void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf)
 {
-	size_t copylen;
+	int copylen;
 
 	assert(data && datasize && buf);
 
@@ -212,9 +208,9 @@ void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b)
 	*buf_b = t;
 }
 
-char *gh_buf_detach(gh_buf *buf)
+unsigned char *gh_buf_detach(gh_buf *buf)
 {
-	char *data = buf->ptr;
+	unsigned char *data = buf->ptr;
 
 	if (buf->asize == 0 || buf->ptr == gh_buf__oom)
 		return NULL;
@@ -224,13 +220,13 @@ char *gh_buf_detach(gh_buf *buf)
 	return data;
 }
 
-void gh_buf_attach(gh_buf *buf, char *ptr, int asize)
+void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize)
 {
 	gh_buf_free(buf);
 
 	if (ptr) {
 		buf->ptr = ptr;
-		buf->size = strlen(ptr);
+		buf->size = strlen((char *)ptr);
 		if (asize)
 			buf->asize = (asize < buf->size) ? buf->size + 1 : asize;
 		else /* pass 0 to fall back on strlen + 1 */
@@ -249,11 +245,11 @@ int gh_buf_cmp(const gh_buf *a, const gh_buf *b)
 
 int gh_buf_strchr(const gh_buf *buf, int c, int pos)
 {
-  const char *p = memchr(buf->ptr + pos, c, buf->size - pos);
-  if (!p)
-    return -1;
+	const char *p = memchr(buf->ptr + pos, c, buf->size - pos);
+	if (!p)
+		return -1;
 
-  return (int)(p - p->ptr);
+	return (int)(p - buf->ptr);
 }
 
 int gh_buf_strrchr(const gh_buf *buf, int c, int pos)
@@ -270,36 +266,21 @@ int gh_buf_strrchr(const gh_buf *buf, int c, int pos)
 
 void gh_buf_truncate(gh_buf *buf, size_t len)
 {
-	assert(buf->asize >= 0);
-
 	if (len < buf->size) {
 		buf->size = len;
 		buf->ptr[buf->size] = '\0';
 	}
 }
 
-void gh_buf_ltruncate(gh_buf *buf, size_t len)
-{
-	assert(buf->asize >= 0);
-
-	if (len && len < buf->size) {
-		memmove(buf->ptr, buf->ptr + len, buf->size - len);
-		buf->size -= len;
-		buf->ptr[buf->size] = '\0';
-	}
-}
-
 void gh_buf_trim(gh_buf *buf)
 {
-	size_t i = 0;
-
-	assert(buf->asize >= 0);
-
-	/* ltrim */
+	/* TODO: leading whitespace? */
+	/*
 	while (i < buf->size && isspace(buf->ptr[i]))
 		i++;
 
 	gh_buf_truncate(buf, i);
+	*/
 
 	/* rtrim */
 	while (buf->size > 0) {
diff --git a/src/buffer.h b/src/buffer.h
index 2581ee3..422ef02 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -24,13 +24,6 @@ extern unsigned char gh_buf__oom[];
  */
 extern void gh_buf_init(gh_buf *buf, int initial_size);
 
-static inline void gh_buf_static(gh_buf *buf, unsigned char *source)
-{
-	buf->ptr = source;
-	buf->size = strlen(source);
-	buf->asize = -1;
-}
-
 /**
  * Attempt to grow the buffer to hold at least `target_size` bytes.
  *
@@ -81,13 +74,13 @@ static inline size_t gh_buf_len(const gh_buf *buf)
 
 extern int gh_buf_cmp(const gh_buf *a, const gh_buf *b);
 
-extern void gh_buf_attach(gh_buf *buf, char *ptr, int asize);
-extern char *gh_buf_detach(gh_buf *buf);
+extern void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize);
+extern unsigned char *gh_buf_detach(gh_buf *buf);
 extern void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf);
 
 static inline const char *gh_buf_cstr(const gh_buf *buf)
 {
-	return buf->ptr;
+	return (char *)buf->ptr;
 }
 
 #define gh_buf_at(buf, n) ((buf)->ptr[n])
@@ -100,10 +93,10 @@ static inline const char *gh_buf_cstr(const gh_buf *buf)
  * return code of these functions and call them in a series then just call
  * gh_buf_oom at the end.
  */
-extern int gh_buf_set(gh_buf *buf, const char *data, int len);
+extern int gh_buf_set(gh_buf *buf, const unsigned char *data, int len);
 extern int gh_buf_sets(gh_buf *buf, const char *string);
-extern int gh_buf_putc(gh_buf *buf, char c);
-extern int gh_buf_put(gh_buf *buf, const char *data, int len);
+extern int gh_buf_putc(gh_buf *buf, int c);
+extern int gh_buf_put(gh_buf *buf, const unsigned char *data, int len);
 extern int gh_buf_puts(gh_buf *buf, const char *string);
 extern int gh_buf_printf(gh_buf *buf, const char *format, ...)
 	__attribute__((format (printf, 2, 3)));
diff --git a/src/html.c b/src/html.c
deleted file mode 100644
index aeec5f1..0000000
--- a/src/html.c
+++ /dev/null
@@ -1,276 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdbool.h>
-#include "bstrlib.h"
-#include "stmd.h"
-#include "debug.h"
-#include "scanners.h"
-
-// Functions to convert block and inline lists to HTML strings.
-
-// Escape special characters in HTML.  More efficient than
-// three calls to bfindreplace.  If preserve_entities is set,
-// existing entities are left alone.
-static bstring escape_html(bstring inp, bool preserve_entities)
-{
-  int pos = 0;
-  int match;
-  char c;
-  bstring escapable = blk2bstr("&<>\"", 4);
-  bstring ent;
-  bstring s = bstrcpy(inp);
-  while ((pos = binchr(s, pos, escapable)) != BSTR_ERR) {
-    c = bchar(s,pos);
-    switch (c) {
-    case '<':
-      bdelete(s, pos, 1);
-      ent = blk2bstr("&lt;", 4);
-      binsert(s, pos, ent, ' ');
-      bdestroy(ent);
-      pos += 4;
-      break;
-    case '>':
-      bdelete(s, pos, 1);
-      ent = blk2bstr("&gt;", 4);
-      binsert(s, pos, ent, ' ');
-      bdestroy(ent);
-      pos += 4;
-      break;
-    case '&':
-      if (preserve_entities && (match = scan_entity(s, pos))) {
-        pos += match;
-      } else {
-        bdelete(s, pos, 1);
-        ent = blk2bstr("&amp;", 5);
-        binsert(s, pos, ent, ' ');
-        bdestroy(ent);
-        pos += 5;
-      }
-      break;
-    case '"':
-      bdelete(s, pos, 1);
-      ent = blk2bstr("&quot;", 6);
-      binsert(s, pos, ent, ' ');
-      bdestroy(ent);
-      pos += 6;
-      break;
-    default:
-      bdelete(s, pos, 1);
-      log_err("unexpected character %02x", c);
-    }
-  }
-  bdestroy(escapable);
-  return s;
-}
-
-static inline void cr(bstring buffer)
-{
-  int c = bchar(buffer, blength(buffer) - 1);
-  if (c != '\n' && c) {
-    bconchar(buffer, '\n');
-  }
-}
-
-// Convert a block list to HTML.  Returns 0 on success, and sets result.
-extern int blocks_to_html(block* b, bstring* result, bool tight)
-{
-  bstring contents = NULL;
-  bstring escaped, escaped2;
-  struct bstrList * info_words;
-  struct ListData * data;
-  bstring mbstart;
-  bstring html = blk2bstr("", 0);
-
-  while(b != NULL) {
-    switch(b->tag) {
-    case document:
-      check(blocks_to_html(b->children, &contents, false) == 0,
-            "error converting blocks to html");
-      bformata(html, "%s", contents->data);
-      bdestroy(contents);
-      break;
-    case paragraph:
-      check(inlines_to_html(b->inline_content, &contents) == 0,
-            "error converting inlines to html");
-      if (tight) {
-        bformata(html, "%s", contents->data);
-      } else {
-        cr(html);
-        bformata(html, "<p>%s</p>", contents->data);
-        cr(html);
-      }
-      bdestroy(contents);
-      break;
-    case block_quote:
-      check(blocks_to_html(b->children, &contents, false) == 0,
-            "error converting blocks to html");
-      cr(html);
-      bformata(html, "<blockquote>\n%s</blockquote>", contents->data);
-      cr(html);
-      bdestroy(contents);
-      break;
-    case list_item:
-      check(blocks_to_html(b->children, &contents, tight) == 0,
-            "error converting blocks to html");
-      brtrimws(contents);
-      cr(html);
-      bformata(html, "<li>%s</li>", contents->data);
-      cr(html);
-      bdestroy(contents);
-      break;
-    case list:
-      // make sure a list starts at the beginning of the line:
-      cr(html);
-      data = &(b->attributes.list_data);
-      check(blocks_to_html(b->children, &contents, data->tight) == 0,
-            "error converting blocks to html");
-      mbstart = bformat(" start=\"%d\"", data->start);
-      bformata(html, "<%s%s>\n%s</%s>",
-               data->list_type == bullet ? "ul" : "ol",
-               data->start == 1 ? "" : (char*) mbstart->data,
-               contents->data,
-               data->list_type == bullet ? "ul" : "ol");
-      cr(html);
-      bdestroy(contents);
-      bdestroy(mbstart);
-      break;
-    case atx_header:
-    case setext_header:
-      check(inlines_to_html(b->inline_content, &contents) == 0,
-            "error converting inlines to html");
-      cr(html);
-      bformata(html, "<h%d>%s</h%d>",
-               b->attributes.header_level,
-               contents->data,
-               b->attributes.header_level);
-      cr(html);
-      bdestroy(contents);
-      break;
-    case indented_code:
-      escaped = escape_html(b->string_content, false);
-      cr(html);
-      bformata(html, "<pre><code>%s</code></pre>", escaped->data);
-      cr(html);
-      bdestroy(escaped);
-      break;
-    case fenced_code:
-      escaped = escape_html(b->string_content, false);
-      cr(html);
-      bformata(html, "<pre><code");
-      if (blength(b->attributes.fenced_code_data.info) > 0) {
-        escaped2 = escape_html(b->attributes.fenced_code_data.info, true);
-        info_words = bsplit(escaped2, ' ');
-        bformata(html, " class=\"language-%s\"", info_words->entry[0]->data);
-        bdestroy(escaped2);
-        bstrListDestroy(info_words);
-      }
-      bformata(html, ">%s</code></pre>", escaped->data);
-      cr(html);
-      bdestroy(escaped);
-      break;
-    case html_block:
-      bformata(html, "%s", b->string_content->data);
-      break;
-    case hrule:
-      bformata(html, "<hr />");
-      cr(html);
-      break;
-    case reference_def:
-      break;
-    default:
-      log_warn("block type %d not implemented\n", b->tag);
-      break;
-    }
-    b = b->next;
-  }
-  *result = html;
-  return 0;
- error:
-  return -1;
-}
-
-// Convert an inline list to HTML.  Returns 0 on success, and sets result.
-extern int inlines_to_html(inl* ils, bstring* result)
-{
-  bstring contents = NULL;
-  bstring html = blk2bstr("", 0);
-  bstring mbtitle, escaped, escaped2;
-
-  while(ils != NULL) {
-    switch(ils->tag) {
-    case str:
-      escaped = escape_html(ils->content.literal, false);
-      bformata(html, "%s", escaped->data);
-      bdestroy(escaped);
-      break;
-    case linebreak:
-      bformata(html, "<br />\n");
-      break;
-    case softbreak:
-      bformata(html, "\n");
-      break;
-    case code:
-      escaped = escape_html(ils->content.literal, false);
-      bformata(html, "<code>%s</code>", escaped->data);
-      bdestroy(escaped);
-      break;
-    case raw_html:
-    case entity:
-      bformata(html, "%s", ils->content.literal->data);
-      break;
-    case link:
-      check(inlines_to_html(ils->content.inlines, &contents) == 0,
-            "error converting inlines to html");
-      if (blength(ils->content.linkable.title) > 0) {
-        escaped = escape_html(ils->content.linkable.title, true);
-        mbtitle = bformat(" title=\"%s\"", escaped->data);
-        bdestroy(escaped);
-      } else {
-        mbtitle = blk2bstr("",0);
-      }
-      escaped = escape_html(ils->content.linkable.url, true);
-      bformata(html, "<a href=\"%s\"%s>%s</a>",
-               escaped->data,
-               mbtitle->data,
-               contents->data);
-      bdestroy(escaped);
-      bdestroy(mbtitle);
-      bdestroy(contents);
-      break;
-    case image:
-      check(inlines_to_html(ils->content.inlines, &contents) == 0,
-            "error converting inlines to html");
-      escaped  = escape_html(ils->content.linkable.url, true);
-      escaped2 = escape_html(contents, false);
-      bdestroy(contents);
-      bformata(html, "<img src=\"%s\" alt=\"%s\"",
-               escaped->data, escaped2->data);
-      bdestroy(escaped);
-      bdestroy(escaped2);
-      if (blength(ils->content.linkable.title) > 0) {
-        escaped = escape_html(ils->content.linkable.title, true);
-        bformata(html, " title=\"%s\"", escaped->data);
-        bdestroy(escaped);
-      }
-      bformata(html, " />");
-      break;
-    case strong:
-      check(inlines_to_html(ils->content.inlines, &contents) == 0,
-            "error converting inlines to html");
-      bformata(html, "<strong>%s</strong>", contents->data);
-      bdestroy(contents);
-      break;
-    case emph:
-      check(inlines_to_html(ils->content.inlines, &contents) == 0,
-            "error converting inlines to html");
-      bformata(html, "<em>%s</em>", contents->data);
-      bdestroy(contents);
-      break;
-    }
-    ils = ils->next;
-  }
-  *result = html;
-  return 0;
- error:
-  return -1;
-}
diff --git a/src/inlines.c b/src/inlines.c
index 4ff45ad..82c7219 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -6,9 +6,7 @@
 
 #include "stmd.h"
 #include "uthash.h"
-#include "debug.h"
 #include "scanners.h"
-#include "utf8.h"
 
 typedef struct Subject {
   const gh_buf   *buffer;
@@ -119,7 +117,7 @@ inline static inl* make_linkable(int t, inl* label, chunk url, chunk title)
 	e->tag = t;
 	e->content.linkable.label = label;
 	e->content.linkable.url   = chunk_to_cstr(&url);
-	e->content.linkable.title = chunk_to_cstr(&title);
+	e->content.linkable.title = url.len ? chunk_to_cstr(&title) : NULL;
 	e->next = NULL;
 	return e;
 }
diff --git a/src/main.c b/src/main.c
index 9e0a3c8..e1abedc 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,99 +1,77 @@
 #include <stdlib.h>
 #include <stdio.h>
-#include "bstrlib.h"
+#include <string.h>
 #include "stmd.h"
 #include "debug.h"
 
 void print_usage()
 {
-  printf("Usage:   stmd [FILE*]\n");
-  printf("Options: --help, -h    Print usage information\n");
-  printf("         --ast         Print AST instead of HTML\n");
-  printf("         --version     Print version\n");
+	printf("Usage:   stmd [FILE*]\n");
+	printf("Options: --help, -h    Print usage information\n");
+	printf("         --ast         Print AST instead of HTML\n");
+	printf("         --version     Print version\n");
 }
 
-int main(int argc, char *argv[]) {
-  int i;
-  bool ast = false;
-  int g = 0;
-  int numfps = 0;
-  int files[argc];
+static void print_document(block *document, bool ast)
+{
+	gh_buf html = GH_BUF_INIT;
+
+	if (ast) {
+		print_blocks(document, 0);
+	} else {
+		blocks_to_html(&html, document, false);
+		printf("%s", html.ptr);
+		gh_buf_free(&html);
+	}
+}
 
-  for (i=1; i < argc; i++) {
-    if (strcmp(argv[i], "--version") == 0) {
-      printf("stmd %s", VERSION);
-      printf(" - CommonMark converter (c) 2014 John MacFarlane\n");
-      exit(0);
-    } else if ((strcmp(argv[i], "--help") == 0) ||
-               (strcmp(argv[i], "-h") == 0)) {
-      print_usage();
-      exit(0);
-    } else if (strcmp(argv[i], "--ast") == 0) {
-      ast = true;
-    } else if (*argv[i] == '-') {
-      print_usage();
-      exit(1);
-    } else { // treat as file argument
-      files[g] = i;
-      g++;
-    }
-  }
+int main(int argc, char *argv[])
+{
+	int i, numfps = 0;
+	bool ast = false;
+	int files[argc];
+	block *document = NULL;
 
-  numfps = g;
-  bstring s = NULL;
-  bstring html;
-  g = 0;
-  block * cur = make_document();
-  int linenum = 1;
-  extern int errno;
-  FILE * fp = NULL;
+	for (i = 1; i < argc; i++) {
+		if (strcmp(argv[i], "--version") == 0) {
+			printf("stmd %s", VERSION);
+			printf(" - CommonMark converter (c) 2014 John MacFarlane\n");
+			exit(0);
+		} else if ((strcmp(argv[i], "--help") == 0) ||
+				(strcmp(argv[i], "-h") == 0)) {
+			print_usage();
+			exit(0);
+		} else if (strcmp(argv[i], "--ast") == 0) {
+			ast = true;
+		} else if (*argv[i] == '-') {
+			print_usage();
+			exit(1);
+		} else { // treat as file argument
+			files[numfps++] = i;
+		}
+	}
 
-  if (numfps == 0) {
-    // read from stdin
-    while ((s = bgets((bNgetc) fgetc, stdin, '\n'))) {
-      check(incorporate_line(s, linenum, &cur) == 0,
-          "error incorporating line %d", linenum);
-      bdestroy(s);
-      linenum++;
-    }
-  } else {
-    // iterate over input file pointers
-    for (g=0; g < numfps; g++) {
+	if (numfps == 0) {
+		document = stmd_parse_file(stdin);
+		print_document(document, ast);
+		free_blocks(document);
+	} else {
+		for (i = 0; i < numfps; i++) {
+			FILE *fp = fopen(argv[files[i]], "r");
 
-      fp = fopen(argv[files[g]], "r");
-      if (fp == NULL) {
-        fprintf(stderr, "Error opening file %s: %s\n",
-                argv[files[g]], strerror(errno));
-        exit(1);
-      }
+			if (fp == NULL) {
+				fprintf(stderr, "Error opening file %s: %s\n",
+						argv[files[i]], strerror(errno));
+				exit(1);
+			}
 
-      while ((s = bgets((bNgetc) fgetc, fp, '\n'))) {
-        check(incorporate_line(s, linenum, &cur) == 0,
-            "error incorporating line %d", linenum);
-        bdestroy(s);
-        linenum++;
-      }
-      fclose(fp);
-    }
-  }
+			document = stmd_parse_file(fp);
+			print_document(document, ast);
+			free_blocks(document);
+			fclose(fp);
+		}
+	}
 
-  while (cur != cur->top) {
-    finalize(cur, linenum);
-    cur = cur->parent;
-  }
-  check(cur == cur->top, "problems finalizing open containers");
-  finalize(cur, linenum);
-  process_inlines(cur, cur->attributes.refmap);
-  if (ast) {
-    print_blocks(cur, 0);
-  } else {
-    check(blocks_to_html(cur, &html, false) == 0, "could not format as HTML");
-    // printf("%s", html->data);
-    bdestroy(html);
-  }
-  free_blocks(cur);
-  return 0;
-error:
-  return -1;
+	return 0;
 }
 
diff --git a/src/print.c b/src/print.c
index a924870..3ebde16 100644
--- a/src/print.c
+++ b/src/print.c
@@ -1,168 +1,175 @@
 #include <stdlib.h>
 #include <stdio.h>
-#include "bstrlib.h"
+#include <string.h>
 #include "stmd.h"
 #include "debug.h"
 
-static bstring format_str(bstring s)
+static void print_str(const unsigned char *s, int len)
 {
-  int pos = 0;
-  int len = blength(s);
-  bstring result = bfromcstr("");
-  char c;
-  bformata(result, "\"");
-  while (pos < len) {
-    c = bchar(s, pos);
-    switch (c) {
-    case '\n':
-      bformata(result, "\\n");
-      break;
-    case '"':
-      bformata(result, "\\\"");
-      break;
-    case '\\':
-      bformata(result, "\\\\");
-      break;
-    default:
-      bformata(result, "%c", c);
-    }
-    pos++;
-  }
-  bformata(result, "\"");
-  return result;
+	int i;
+
+	if (len < 0)
+		len = strlen(s);
+
+	putchar('"');
+	for (i = 0; i < len; ++i) {
+		unsigned char c = s[i];
+
+		switch (c) {
+			case '\n':
+				printf("\\n");
+				break;
+			case '"':
+				printf("\\\"");
+				break;
+			case '\\':
+				printf("\\\\");
+				break;
+			default:
+				putchar((int)c);
+		}
+	}
+	putchar('"');
 }
 
 // Functions to pretty-print inline and block lists, for debugging.
 // Prettyprint an inline list, for debugging.
 extern void print_blocks(block* b, int indent)
 {
-  struct ListData * data;
-  while(b != NULL) {
-    // printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line);
-    for (int i=0; i < indent; i++) {
-      putchar(' ');
-    }
-    switch(b->tag) {
-    case document:
-      printf("document\n");
-      print_blocks(b->children, indent + 2);
-      break;
-    case block_quote:
-      printf("block_quote\n");
-      print_blocks(b->children, indent + 2);
-      break;
-    case list_item:
-      data = &(b->attributes.list_data);
-      printf("list_item\n");
-      print_blocks(b->children, indent + 2);
-      break;
-    case list:
-      data = &(b->attributes.list_data);
-      if (data->list_type == ordered) {
-        printf("list (type=ordered tight=%s start=%d delim=%s)\n",
-               (data->tight ? "true" : "false"),
-               data->start,
-               (data->delimiter == parens ? "parens" : "period"));
-      } else {
-        printf("list (type=bullet tight=%s bullet_char=%c)\n",
-               (data->tight ? "true" : "false"),
-               data->bullet_char);
-      }
-      print_blocks(b->children, indent + 2);
-      break;
-    case atx_header:
-      printf("atx_header (level=%d)\n", b->attributes.header_level);
-      print_inlines(b->inline_content, indent + 2);
-      break;
-    case setext_header:
-      printf("setext_header (level=%d)\n", b->attributes.header_level);
-      print_inlines(b->inline_content, indent + 2);
-      break;
-    case paragraph:
-      printf("paragraph\n");
-      print_inlines(b->inline_content, indent + 2);
-      break;
-    case hrule:
-      printf("hrule\n");
-      break;
-    case indented_code:
-      printf("indented_code %s\n", format_str(b->string_content)->data);
-      break;
-    case fenced_code:
-      printf("fenced_code length=%d info=%s %s\n",
-             b->attributes.fenced_code_data.fence_length,
-             format_str(b->attributes.fenced_code_data.info)->data,
-             format_str(b->string_content)->data);
-      break;
-    case html_block:
-      printf("html_block %s\n", format_str(b->string_content)->data);
-      break;
-    case reference_def:
-      printf("reference_def\n");
-      break;
-    default:
-      log_warn("block type %d not implemented\n", b->tag);
-      break;
-    }
-    b = b->next;
-  }
+	struct ListData *data;
+
+	while(b != NULL) {
+		// printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line);
+		for (int i=0; i < indent; i++) {
+			putchar(' ');
+		}
+
+		switch(b->tag) {
+		case document:
+			printf("document\n");
+			print_blocks(b->children, indent + 2);
+			break;
+		case block_quote:
+			printf("block_quote\n");
+			print_blocks(b->children, indent + 2);
+			break;
+		case list_item:
+			data = &(b->attributes.list_data);
+			printf("list_item\n");
+			print_blocks(b->children, indent + 2);
+			break;
+		case list:
+			data = &(b->attributes.list_data);
+			if (data->list_type == ordered) {
+				printf("list (type=ordered tight=%s start=%d delim=%s)\n",
+						(data->tight ? "true" : "false"),
+						data->start,
+						(data->delimiter == parens ? "parens" : "period"));
+			} else {
+				printf("list (type=bullet tight=%s bullet_char=%c)\n",
+						(data->tight ? "true" : "false"),
+						data->bullet_char);
+			}
+			print_blocks(b->children, indent + 2);
+			break;
+		case atx_header:
+			printf("atx_header (level=%d)\n", b->attributes.header_level);
+			print_inlines(b->inline_content, indent + 2);
+			break;
+		case setext_header:
+			printf("setext_header (level=%d)\n", b->attributes.header_level);
+			print_inlines(b->inline_content, indent + 2);
+			break;
+		case paragraph:
+			printf("paragraph\n");
+			print_inlines(b->inline_content, indent + 2);
+			break;
+		case hrule:
+			printf("hrule\n");
+			break;
+		case indented_code:
+			printf("indented_code ");
+			print_str(b->string_content.ptr, -1);
+			putchar('\n');
+			break;
+		case fenced_code:
+			printf("fenced_code length=%d info=",
+				b->attributes.fenced_code_data.fence_length);
+			print_str(b->attributes.fenced_code_data.info.ptr, -1);
+			putchar(' ');
+			print_str(b->string_content.ptr, -1);
+			putchar('\n');
+			break;
+		case html_block:
+			printf("html_block ");
+			print_str(b->string_content.ptr, -1);
+			putchar('\n');
+			break;
+		case reference_def:
+			printf("reference_def\n");
+			break;
+		default:
+			printf("# NOT IMPLEMENTED (%d)\n", b->tag);
+			break;
+		}
+		b = b->next;
+	}
 }
 
 // Prettyprint an inline list, for debugging.
 extern void print_inlines(inl* ils, int indent)
 {
-  while(ils != NULL) {
-    /*
-    // we add 11 extra spaces for the line/column info
-    for (int i=0; i < 11; i++) {
-      putchar(' ');
-    }
-    putchar('|');
-    putchar(' ');
-    */
-    for (int i=0; i < indent; i++) {
-      putchar(' ');
-    }
-    switch(ils->tag) {
-    case str:
-      printf("str %s\n", format_str(ils->content.literal)->data);
-      break;
-    case linebreak:
-      printf("linebreak\n");
-      break;
-    case softbreak:
-      printf("softbreak\n");
-      break;
-    case code:
-      printf("code %s\n", format_str(ils->content.literal)->data);
-      break;
-    case raw_html:
-      printf("html %s\n", format_str(ils->content.literal)->data);
-      break;
-    case entity:
-      printf("entity %s\n", format_str(ils->content.literal)->data);
-      break;
-    case link:
-      printf("link url=%s title=%s\n",
-             format_str(ils->content.linkable.url)->data,
-             format_str(ils->content.linkable.title)->data);
-      print_inlines(ils->content.linkable.label, indent + 2);
-      break;
-    case image:
-      printf("image url=%s title=%s\n",
-             format_str(ils->content.linkable.url)->data,
-             format_str(ils->content.linkable.title)->data);
-      print_inlines(ils->content.linkable.label, indent + 2);
-      break;
-    case strong:
-      printf("strong\n");
-      print_inlines(ils->content.linkable.label, indent + 2);
-      break;
-    case emph:
-      printf("emph\n");
-      print_inlines(ils->content.linkable.label, indent + 2);
-      break;
-    }
-    ils = ils->next;
-  }
+	while(ils != NULL) {
+		for (int i=0; i < indent; i++) {
+			putchar(' ');
+		}
+		switch(ils->tag) {
+		case str:
+			printf("str ");
+			print_str(ils->content.literal.data, ils->content.literal.len);
+			putchar('\n');
+			break;
+		case linebreak:
+			printf("linebreak\n");
+			break;
+		case softbreak:
+			printf("softbreak\n");
+			break;
+		case code:
+			printf("code ");
+			print_str(ils->content.literal.data, ils->content.literal.len);
+			putchar('\n');
+			break;
+		case raw_html:
+			printf("html ");
+			print_str(ils->content.literal.data, ils->content.literal.len);
+			putchar('\n');
+			break;
+		case entity:
+			printf("entity ");
+			print_str(ils->content.literal.data, ils->content.literal.len);
+			putchar('\n');
+			break;
+		case link:
+		case image:
+			printf("%s url=", ils->tag == link ? "link" : "image");
+			print_str(ils->content.linkable.url, -1);
+			if (ils->content.linkable.title) {
+				printf(" title=");
+				print_str(ils->content.linkable.title, -1);
+			}
+			putchar('\n');
+			print_inlines(ils->content.linkable.label, indent + 2);
+			break;
+		case strong:
+			printf("strong\n");
+			print_inlines(ils->content.linkable.label, indent + 2);
+			break;
+		case emph:
+			printf("emph\n");
+			print_inlines(ils->content.linkable.label, indent + 2);
+			break;
+		}
+		ils = ils->next;
+	}
 }
diff --git a/src/stmd.h b/src/stmd.h
index eb1b989..dc24235 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -105,19 +105,14 @@ extern block* add_child(block* parent,
                         int block_type, int start_line, int start_column);
 void free_blocks(block* e);
 
-block *stmd_parse_document(const char *buffer, size_t len);
-
-// FOR NOW:
-void process_inlines(block* cur, reference** refmap);
-void incorporate_line(gh_buf *ln, int line_number, block** curptr);
-void finalize(block* b, int line_number);
+extern block *stmd_parse_document(const unsigned char *buffer, size_t len);
+extern block *stmd_parse_file(FILE *f);
 
 void print_inlines(inl* ils, int indent);
 void print_blocks(block* blk, int indent);
 
-/* TODO */
-// int blocks_to_html(block* b, bstring* result, bool tight);
-// int inlines_to_html(inl* b, bstring* result);
+void blocks_to_html(gh_buf *html, block *b, bool tight);
+void inlines_to_html(gh_buf *html, inl *b);
 
 void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len);
 
diff --git a/src/utf8.c b/src/utf8.c
index 1a5df9e..e3f8dd3 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -1,6 +1,8 @@
 #include <stdlib.h>
-#include "bstrlib.h"
-#include "debug.h"
+#include <stdint.h>
+#include <unistd.h>
+
+#include "stmd.h"
 
 static const int8_t utf8proc_utf8class[256] = {
 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
diff --git a/src/utf8.h b/src/utf8.h
deleted file mode 100644
index fe59a90..0000000
--- a/src/utf8.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#include <stdlib.h>
-#include "bstrlib.h"
-
-extern unsigned char * from_utf8(unsigned char * s, unsigned int *n);
-extern int to_utf8(unsigned int c, bstring dest);
-extern bstring case_fold(bstring source);
-- 
cgit v1.2.3


From 24248c0f1a6de6f229890c5c03aeff8738214fee Mon Sep 17 00:00:00 2001
From: Vicent Marti <tanoku@gmail.com>
Date: Tue, 2 Sep 2014 13:30:13 +0200
Subject: Rename inlines

---
 src/inlines.c | 50 +++++++++++++++++++++++++-------------------------
 src/print.c   | 22 +++++++++++-----------
 src/stmd.h    |  4 ++--
 3 files changed, 38 insertions(+), 38 deletions(-)

(limited to 'src/inlines.c')

diff --git a/src/inlines.c b/src/inlines.c
index 82c7219..b9ece0e 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -151,15 +151,15 @@ inline static inl* make_simple(int t)
 }
 
 // Macros for creating various kinds of inlines.
-#define make_str(s) make_literal(str, s)
-#define make_code(s) make_literal(code, s)
-#define make_raw_html(s) make_literal(raw_html, s)
-#define make_entity(s) make_literal(entity, s)
-#define make_linebreak() make_simple(linebreak)
-#define make_softbreak() make_simple(softbreak)
-#define make_link(label, url, title) make_linkable(link, label, url, title)
-#define make_emph(contents) make_inlines(emph, contents)
-#define make_strong(contents) make_inlines(strong, contents)
+#define make_str(s) make_literal(INL_STRING, s)
+#define make_code(s) make_literal(INL_CODE, s)
+#define make_raw_html(s) make_literal(INL_RAW_HTML, s)
+#define make_entity(s) make_literal(INL_ENTITY, s)
+#define make_linebreak() make_simple(INL_LINEBREAK)
+#define make_softbreak() make_simple(INL_SOFTBREAK)
+#define make_link(label, url, title) make_linkable(INL_LINK, label, url, title)
+#define make_emph(contents) make_inlines(INL_EMPH, contents)
+#define make_strong(contents) make_inlines(INL_STRONG, contents)
 
 // Free an inline list.
 extern void free_inlines(inl* e)
@@ -167,23 +167,23 @@ extern void free_inlines(inl* e)
 	inl * next;
 	while (e != NULL) {
 		switch (e->tag){
-			case str:
-			case raw_html:
-			case code:
-			case entity:
+			case INL_STRING:
+			case INL_RAW_HTML:
+			case INL_CODE:
+			case INL_ENTITY:
 				chunk_free(&e->content.literal);
 				break;
-			case linebreak:
-			case softbreak:
+			case INL_LINEBREAK:
+			case INL_SOFTBREAK:
 				break;
-			case link:
-			case image:
+			case INL_LINK:
+			case INL_IMAGE:
 				free(e->content.linkable.url);
 				free(e->content.linkable.title);
 				free_inlines(e->content.linkable.label);
 				break;
-			case emph:
-			case strong:
+			case INL_EMPH:
+			case INL_STRONG:
 				free_inlines(e->content.inlines);
 				break;
 			default:
@@ -454,7 +454,7 @@ static inl* handle_strong_emph(subject* subj, char c)
 				numdelims = scan_delims(subj, c, &can_open, &can_close);
 				if (numdelims >= 1 && can_close) {
 					subj->pos += 1;
-					first_head->tag = emph;
+					first_head->tag = INL_EMPH;
 					chunk_free(&first_head->content.literal);
 					first_head->content.inlines = first_head->next;
 					first_head->next = NULL;
@@ -471,7 +471,7 @@ static inl* handle_strong_emph(subject* subj, char c)
 				numdelims = scan_delims(subj, c, &can_open, &can_close);
 				if (numdelims >= 2 && can_close) {
 					subj->pos += 2;
-					first_head->tag = strong;
+					first_head->tag = INL_STRONG;
 					chunk_free(&first_head->content.literal);
 					first_head->content.inlines = first_head->next;
 					first_head->next = NULL;
@@ -502,10 +502,10 @@ static inl* handle_strong_emph(subject* subj, char c)
 					}
 					subj->pos += numdelims;
 					if (first_close) {
-						first_head->tag = first_close_delims == 1 ? strong : emph;
+						first_head->tag = first_close_delims == 1 ? INL_STRONG : INL_EMPH;
 						chunk_free(&first_head->content.literal);
 						first_head->content.inlines =
-							make_inlines(first_close_delims == 1 ? emph : strong,
+							make_inlines(first_close_delims == 1 ? INL_EMPH : INL_STRONG,
 									first_head->next);
 
 						il = first_head->next;
@@ -989,8 +989,8 @@ static int parse_inline(subject* subj, inl ** last)
 			advance(subj);
 			if (peek_char(subj) == '[') {
 				new = handle_left_bracket(subj);
-				if (new != NULL && new->tag == link) {
-					new->tag = image;
+				if (new != NULL && new->tag == INL_LINK) {
+					new->tag = INL_IMAGE;
 				} else {
 					new = append_inlines(make_str(chunk_literal("!")), new);
 				}
diff --git a/src/print.c b/src/print.c
index 3ebde16..0a87925 100644
--- a/src/print.c
+++ b/src/print.c
@@ -124,35 +124,35 @@ extern void print_inlines(inl* ils, int indent)
 			putchar(' ');
 		}
 		switch(ils->tag) {
-		case str:
+		case INL_STRING:
 			printf("str ");
 			print_str(ils->content.literal.data, ils->content.literal.len);
 			putchar('\n');
 			break;
-		case linebreak:
+		case INL_LINEBREAK:
 			printf("linebreak\n");
 			break;
-		case softbreak:
+		case INL_SOFTBREAK:
 			printf("softbreak\n");
 			break;
-		case code:
+		case INL_CODE:
 			printf("code ");
 			print_str(ils->content.literal.data, ils->content.literal.len);
 			putchar('\n');
 			break;
-		case raw_html:
+		case INL_RAW_HTML:
 			printf("html ");
 			print_str(ils->content.literal.data, ils->content.literal.len);
 			putchar('\n');
 			break;
-		case entity:
+		case INL_ENTITY:
 			printf("entity ");
 			print_str(ils->content.literal.data, ils->content.literal.len);
 			putchar('\n');
 			break;
-		case link:
-		case image:
-			printf("%s url=", ils->tag == link ? "link" : "image");
+		case INL_LINK:
+		case INL_IMAGE:
+			printf("%s url=", ils->tag == INL_LINK ? "link" : "image");
 			print_str(ils->content.linkable.url, -1);
 			if (ils->content.linkable.title) {
 				printf(" title=");
@@ -161,11 +161,11 @@ extern void print_inlines(inl* ils, int indent)
 			putchar('\n');
 			print_inlines(ils->content.linkable.label, indent + 2);
 			break;
-		case strong:
+		case INL_STRONG:
 			printf("strong\n");
 			print_inlines(ils->content.linkable.label, indent + 2);
 			break;
-		case emph:
+		case INL_EMPH:
 			printf("emph\n");
 			print_inlines(ils->content.linkable.label, indent + 2);
 			break;
diff --git a/src/stmd.h b/src/stmd.h
index dc24235..1e490d6 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -12,8 +12,8 @@ typedef struct {
 } chunk;
 
 typedef struct Inline {
-	enum { str, softbreak, linebreak, code, raw_html, entity,
-		emph, strong, link, image } tag;
+	enum { INL_STRING, INL_SOFTBREAK, INL_LINEBREAK, INL_CODE, INL_RAW_HTML, INL_ENTITY,
+		INL_EMPH, INL_STRONG, INL_LINK, INL_IMAGE } tag;
 	union {
 		chunk literal;
 		struct Inline *inlines;
-- 
cgit v1.2.3


From 7e12fdba0c9a444a3cfc29c520e2f2caa57a8232 Mon Sep 17 00:00:00 2001
From: Vicent Marti <tanoku@gmail.com>
Date: Tue, 2 Sep 2014 14:15:24 +0200
Subject: NO SEGFAULTS KTHX

---
 src/inlines.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'src/inlines.c')

diff --git a/src/inlines.c b/src/inlines.c
index b9ece0e..7b48ad9 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -84,7 +84,7 @@ extern reference* lookup_reference(reference** refmap, chunk *label)
 	if (refmap != NULL) {
 		HASH_FIND_STR(*refmap, (char*)norm, ref);
 	}
-	free(label);
+	free(norm);
 	return ref;
 }
 
@@ -262,7 +262,7 @@ inline static unsigned char *chunk_to_cstr(chunk *c)
 
 inline static chunk chunk_literal(const char *data)
 {
-	chunk c = {data, strlen(data), 0};
+	chunk c = {data, data ? strlen(data) : 0, 0};
 	return c;
 }
 
@@ -937,6 +937,7 @@ static int find_special_char(subject *subj)
 	while (n < size) {
 		if (strchr("\n\\`&_*[]<!", gh_buf_at(subj->buffer, n)))
 			return n;
+		n++;
 	}
 
 	return -1;
@@ -974,7 +975,9 @@ static int parse_inline(subject* subj, inl ** last)
 		case '_':
 			if (subj->pos > 0 && (isalnum(gh_buf_at(subj->buffer, subj->pos - 1)) ||
 						gh_buf_at(subj->buffer, subj->pos - 1) == '_')) {
-				goto text_literal;
+				new = make_str(chunk_literal("_"));
+				advance(subj);
+				break;
 			}
 
 			new = handle_strong_emph(subj, '_');
-- 
cgit v1.2.3


From a7314deae649646f1f7ce5ede972641b5b62538c Mon Sep 17 00:00:00 2001
From: Vicent Marti <tanoku@gmail.com>
Date: Wed, 3 Sep 2014 03:40:23 +0200
Subject: 338/103

---
 Makefile                  |   4 +-
 src/blocks.c              | 173 +++++++++++++++++-----------------
 src/buffer.c              |  26 ++++-
 src/buffer.h              |   2 +-
 src/html/houdini_href_e.c |  10 +-
 src/html/houdini_html_e.c |  10 +-
 src/html/html.c           |   4 +-
 src/inlines.c             | 235 ++++++++++++++++++----------------------------
 src/print.c               |   2 +-
 src/scanners.h            |  28 +++---
 src/scanners.re           |  85 +++++++----------
 src/stmd.h                |  16 ++--
 12 files changed, 261 insertions(+), 334 deletions(-)

(limited to 'src/inlines.c')

diff --git a/Makefile b/Makefile
index d14a928..89ec68c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
-CFLAGS=-ggdb3 -O0 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS)
-LDFLAGS=-ggdb3 -O0 -Wall -Werror
+CFLAGS=-ggdb3 -O0 -Wall -Wextra -Wno-unused-variable -std=c99 -Isrc $(OPTFLAGS)
+LDFLAGS=-ggdb3 -O0 -Wall -Wno-unused-variable # -Werror
 SRCDIR=src
 DATADIR=data
 
diff --git a/src/blocks.c b/src/blocks.c
index 42f20db..94ff986 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -8,6 +8,8 @@
 #include "scanners.h"
 #include "uthash.h"
 
+#define peek_at(i, n) (i)->data[n]
+
 static void incorporate_line(gh_buf *ln, int line_number, block** curptr);
 static void finalize(block* b, int line_number);
 
@@ -27,7 +29,6 @@ static block* make_block(int tag, int start_line, int start_column)
 	e->top = NULL;
 	e->attributes.refmap = NULL;
 	gh_buf_init(&e->string_content, 32);
-	e->string_pos = 0;
 	e->inline_content = NULL;
 	e->next = NULL;
 	e->prev = NULL;
@@ -80,10 +81,10 @@ static inline bool accepts_lines(int block_type)
 			block_type == fenced_code);
 }
 
-static void add_line(block* block, gh_buf *ln, int offset)
+static void add_line(block* block, chunk *ch, int offset)
 {
 	assert(block->open);
-	gh_buf_put(&block->string_content, ln->ptr + offset, ln->size - offset);
+	gh_buf_put(&block->string_content, ch->data + offset, ch->len - offset);
 }
 
 static void remove_trailing_blank_lines(gh_buf *ln)
@@ -104,7 +105,7 @@ static void remove_trailing_blank_lines(gh_buf *ln)
 
 	i = gh_buf_strchr(ln, '\n', i);
 	if (i >= 0)
-		gh_buf_truncate(ln, i + 1);
+		gh_buf_truncate(ln, i);
 }
 
 // Check to see if a block ends with a blank line, descending
@@ -162,12 +163,12 @@ static void finalize(block* b, int line_number)
 	switch (b->tag) {
 		case paragraph:
 			pos = 0;
-			while (gh_buf_at(&b->string_content, b->string_pos) == '[' &&
-					(pos = parse_reference(&b->string_content, b->string_pos,
-										   b->top->attributes.refmap))) {
-				b->string_pos = pos;
+			while (gh_buf_at(&b->string_content, 0) == '[' &&
+					(pos = parse_reference(&b->string_content, b->top->attributes.refmap))) {
+
+				gh_buf_drop(&b->string_content, pos);
 			}
-			if (is_blank(&b->string_content, b->string_pos)) {
+			if (is_blank(&b->string_content, 0)) {
 				b->tag = reference_def;
 			}
 			break;
@@ -179,14 +180,16 @@ static void finalize(block* b, int line_number)
 
 		case fenced_code:
 			// first line of contents becomes info
-			firstlinelen = gh_buf_strchr(&b->string_content, '\n', b->string_pos);
+			firstlinelen = gh_buf_strchr(&b->string_content, '\n', 0);
+
+			gh_buf_init(&b->attributes.fenced_code_data.info, 0);
 			gh_buf_set(
 				&b->attributes.fenced_code_data.info,
-				b->string_content.ptr + b->string_pos,
+				b->string_content.ptr,
 				firstlinelen
 			);
 
-			b->string_pos = firstlinelen + 1;
+			gh_buf_drop(&b->string_content, firstlinelen + 1);
 
 			gh_buf_trim(&b->attributes.fenced_code_data.info);
 			unescape_buffer(&b->attributes.fenced_code_data.info);
@@ -281,7 +284,7 @@ void process_inlines(block* cur, reference** refmap)
 		case paragraph:
 		case atx_header:
 		case setext_header:
-			cur->inline_content = parse_inlines(&cur->string_content, cur->string_pos, refmap);
+			cur->inline_content = parse_inlines(&cur->string_content, refmap);
 			// MEM
 			// gh_buf_free(&cur->string_content);
 			break;
@@ -300,19 +303,18 @@ void process_inlines(block* cur, reference** refmap)
 // Attempts to parse a list item marker (bullet or enumerated).
 // On success, returns length of the marker, and populates
 // data with the details.  On failure, returns 0.
-static int parse_list_marker(gh_buf *ln, int pos,
-		struct ListData ** dataptr)
+static int parse_list_marker(chunk *input, int pos, struct ListData ** dataptr)
 {
-	char c;
+	unsigned char c;
 	int startpos;
 	struct ListData * data;
 
 	startpos = pos;
-	c = gh_buf_at(ln, pos);
+	c = peek_at(input, pos);
 
-	if ((c == '*' || c == '-' || c == '+') && !scan_hrule(ln, pos)) {
+	if ((c == '*' || c == '-' || c == '+') && !scan_hrule(input, pos)) {
 		pos++;
-		if (!isspace(gh_buf_at(ln, pos))) {
+		if (!isspace(peek_at(input, pos))) {
 			return 0;
 		}
 		data = malloc(sizeof(struct ListData));
@@ -327,14 +329,14 @@ static int parse_list_marker(gh_buf *ln, int pos,
 		int start = 0;
 
 		do {
-			start = (10 * start) + (gh_buf_at(ln, pos) - '0');
+			start = (10 * start) + (peek_at(input, pos) - '0');
 			pos++;
-		} while (isdigit(gh_buf_at(ln, pos)));
+		} while (isdigit(peek_at(input, pos)));
 
-		c = gh_buf_at(ln, pos);
+		c = peek_at(input, pos);
 		if (c == '.' || c == ')') {
 			pos++;
-			if (!isspace(gh_buf_at(ln, pos))) {
+			if (!isspace(peek_at(input, pos))) {
 				return 0;
 			}
 			data = malloc(sizeof(struct ListData));
@@ -449,8 +451,26 @@ extern block *stmd_parse_document(const unsigned char *buffer, size_t len)
 	return finalize_document(document, linenum);
 }
 
+static void chop_trailing_hashtags(chunk *ch)
+{
+	int n;
+
+	chunk_rtrim(ch);
+	n = ch->len - 1;
+
+	// if string ends in #s, remove these:
+	while (n >= 0 && peek_at(ch, n) == '#')
+		n--;
+
+	// the last # was escaped, so we include it.
+	if (n >= 0 && peek_at(ch, n) == '\\')
+		n++;
+
+	ch->len = n + 1;
+}
+
 // Process one line at a time, modifying a block.
-static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
+static void incorporate_line(gh_buf *line, int line_number, block** curptr)
 {
 	block* last_matched_container;
 	int offset = 0;
@@ -464,6 +484,10 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
 	bool blank = false;
 	int first_nonspace;
 	int indent;
+	chunk input;
+
+	input.data = line->ptr;
+	input.len = line->size;
 
 	// container starts at the document root.
 	container = cur->top;
@@ -475,21 +499,19 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
 		container = container->last_child;
 
 		first_nonspace = offset;
-		while (gh_buf_at(ln, first_nonspace) == ' ') {
+		while (peek_at(&input, first_nonspace) == ' ') {
 			first_nonspace++;
 		}
 
 		indent = first_nonspace - offset;
-		blank = gh_buf_at(ln, first_nonspace) == '\n';
+		blank = peek_at(&input, first_nonspace) == '\n';
 
 		if (container->tag == block_quote) {
-
-			matched = indent <= 3 && gh_buf_at(ln, first_nonspace) == '>';
+			matched = indent <= 3 && peek_at(&input, first_nonspace) == '>';
 			if (matched) {
 				offset = first_nonspace + 1;
-				if (gh_buf_at(ln, offset) == ' ') {
+				if (peek_at(&input, offset) == ' ')
 					offset++;
-				}
 			} else {
 				all_matched = false;
 			}
@@ -526,7 +548,7 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
 
 			// skip optional spaces of fence offset
 			i = container->attributes.fenced_code_data.fence_offset;
-			while (i > 0 && gh_buf_at(ln, offset) == ' ') {
+			while (i > 0 && peek_at(&input, offset) == ' ') {
 				offset++;
 				i--;
 			}
@@ -564,15 +586,13 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
 			container->tag != html_block) {
 
 		first_nonspace = offset;
-		while (gh_buf_at(ln, first_nonspace) == ' ') {
+		while (peek_at(&input, first_nonspace) == ' ')
 			first_nonspace++;
-		}
 
 		indent = first_nonspace - offset;
-		blank = gh_buf_at(ln, first_nonspace) == '\n';
+		blank = peek_at(&input, first_nonspace) == '\n';
 
 		if (indent >= CODE_INDENT) {
-
 			if (cur->tag != paragraph && !blank) {
 				offset += CODE_INDENT;
 				container = add_child(container, indented_code, line_number, offset + 1);
@@ -580,76 +600,70 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
 				break;
 			}
 
-		} else if (gh_buf_at(ln, first_nonspace) == '>') {
+		} else if (peek_at(&input, first_nonspace) == '>') {
 
 			offset = first_nonspace + 1;
 			// optional following character
-			if (gh_buf_at(ln, offset) == ' ') {
+			if (peek_at(&input, offset) == ' ')
 				offset++;
-			}
 			container = add_child(container, block_quote, line_number, offset + 1);
 
-		} else if ((matched = scan_atx_header_start(ln, first_nonspace))) {
+		} else if ((matched = scan_atx_header_start(&input, first_nonspace))) {
 
 			offset = first_nonspace + matched;
 			container = add_child(container, atx_header, line_number, offset + 1);
 
-			int hashpos = gh_buf_strchr(ln, '#', first_nonspace);
-			assert(hashpos >= 0);
-
+			int hashpos = chunk_strchr(&input, '#', first_nonspace);
 			int level = 0;
-			while (gh_buf_at(ln, hashpos) == '#') {
+
+			while (peek_at(&input, hashpos) == '#') {
 				level++;
 				hashpos++;
 			}
 			container->attributes.header_level = level;
 
-		} else if ((matched = scan_open_code_fence(ln, first_nonspace))) {
+		} else if ((matched = scan_open_code_fence(&input, first_nonspace))) {
 
-			container = add_child(container, fenced_code, line_number,
-					first_nonspace + 1);
-			container->attributes.fenced_code_data.fence_char = gh_buf_at(ln,
-					first_nonspace);
+			container = add_child(container, fenced_code, line_number, first_nonspace + 1);
+			container->attributes.fenced_code_data.fence_char = peek_at(&input, first_nonspace);
 			container->attributes.fenced_code_data.fence_length = matched;
-			container->attributes.fenced_code_data.fence_offset =
-				first_nonspace - offset;
+			container->attributes.fenced_code_data.fence_offset = first_nonspace - offset;
 			offset = first_nonspace + matched;
 
-		} else if ((matched = scan_html_block_tag(ln, first_nonspace))) {
+		} else if ((matched = scan_html_block_tag(&input, first_nonspace))) {
 
-			container = add_child(container, html_block, line_number,
-					first_nonspace + 1);
+			container = add_child(container, html_block, line_number, first_nonspace + 1);
 			// note, we don't adjust offset because the tag is part of the text
 
 		} else if (container->tag == paragraph &&
-				(lev = scan_setext_header_line(ln, first_nonspace)) &&
+				(lev = scan_setext_header_line(&input, first_nonspace)) &&
 				// check that there is only one line in the paragraph:
 				gh_buf_strrchr(&container->string_content, '\n',
 					gh_buf_len(&container->string_content) - 2) < 0) {
 
 			container->tag = setext_header;
 			container->attributes.header_level = lev;
-			offset = gh_buf_len(ln) - 1;
+			offset = input.len - 1;
 
 		} else if (!(container->tag == paragraph && !all_matched) &&
-				(matched = scan_hrule(ln, first_nonspace))) {
+				(matched = scan_hrule(&input, first_nonspace))) {
 
 			// it's only now that we know the line is not part of a setext header:
 			container = add_child(container, hrule, line_number, first_nonspace + 1);
 			finalize(container, line_number);
 			container = container->parent;
-			offset = gh_buf_len(ln) - 1;
+			offset = input.len - 1;
 
-		} else if ((matched = parse_list_marker(ln, first_nonspace, &data))) {
+		} else if ((matched = parse_list_marker(&input, first_nonspace, &data))) {
 
 			// compute padding:
 			offset = first_nonspace + matched;
 			i = 0;
-			while (i <= 5 && gh_buf_at(ln, offset + i) == ' ') {
+			while (i <= 5 && peek_at(&input, offset + i) == ' ') {
 				i++;
 			}
 			// i = number of spaces after marker, up to 5
-			if (i >= 5 || i < 1 || gh_buf_at(ln, offset) == '\n') {
+			if (i >= 5 || i < 1 || peek_at(&input, offset) == '\n') {
 				data->padding = matched + 1;
 				if (i > 0) {
 					offset += 1;
@@ -674,6 +688,7 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
 			// add the list item
 			container = add_child(container, list_item, line_number,
 					first_nonspace + 1);
+			/* TODO: static */
 			container->attributes.list_data = *data;
 			free(data);
 
@@ -691,12 +706,11 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
 	// appropriate container.
 
 	first_nonspace = offset;
-	while (gh_buf_at(ln, first_nonspace) == ' ') {
+	while (peek_at(&input, first_nonspace) == ' ')
 		first_nonspace++;
-	}
 
 	indent = first_nonspace - offset;
-	blank = gh_buf_at(ln, first_nonspace) == '\n';
+	blank = peek_at(&input, first_nonspace) == '\n';
 
 	// block quote lines are never blank as they start with >
 	// and we don't count blanks in fenced code for purposes of tight/loose
@@ -721,13 +735,12 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
 			cur->tag == paragraph &&
 			gh_buf_len(&cur->string_content) > 0) {
 
-		add_line(cur, ln, offset);
+		add_line(cur, &input, offset);
 
 	} else { // not a lazy continuation
 
 		// finalize any blocks that were not matched and set cur to container:
 		while (cur != last_matched_container) {
-
 			finalize(cur, line_number);
 			cur = cur->parent;
 			assert(cur != NULL);
@@ -735,58 +748,46 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
 
 		if (container->tag == indented_code) {
 
-			add_line(container, ln, offset);
+			add_line(container, &input, offset);
 
 		} else if (container->tag == fenced_code) {
 
 			matched = (indent <= 3
-					&& gh_buf_at(ln, first_nonspace) == container->attributes.fenced_code_data.fence_char)
-				&& scan_close_code_fence(ln, first_nonspace,
+					&& peek_at(&input, first_nonspace) == container->attributes.fenced_code_data.fence_char)
+				&& scan_close_code_fence(&input, first_nonspace,
 						container->attributes.fenced_code_data.fence_length);
 			if (matched) {
 				// if closing fence, don't add line to container; instead, close it:
 				finalize(container, line_number);
 				container = container->parent; // back up to parent
 			} else {
-				add_line(container, ln, offset);
+				add_line(container, &input, offset);
 			}
 
 		} else if (container->tag == html_block) {
 
-			add_line(container, ln, offset);
+			add_line(container, &input, offset);
 
 		} else if (blank) {
 
 			// ??? do nothing
 
 		} else if (container->tag == atx_header) {
-			// chop off trailing ###s...use a scanner?
-			gh_buf_trim(ln);
-			int p = gh_buf_len(ln) - 1;
-
-			// if string ends in #s, remove these:
-			while (gh_buf_at(ln, p) == '#') {
-				p--;
-			}
-			if (gh_buf_at(ln, p) == '\\') {
-				// the last # was escaped, so we include it.
-				p++;
-			}
 
-			gh_buf_truncate(ln, p + 1);
-			add_line(container, ln, first_nonspace);
+			chop_trailing_hashtags(&input);
+			add_line(container, &input, first_nonspace);
 			finalize(container, line_number);
 			container = container->parent;
 
 		} else if (accepts_lines(container->tag)) {
 
-			add_line(container, ln, first_nonspace);
+			add_line(container, &input, first_nonspace);
 
 		} else if (container->tag != hrule && container->tag != setext_header) {
 
 			// create paragraph container for line
 			container = add_child(container, paragraph, line_number, first_nonspace + 1);
-			add_line(container, ln, first_nonspace);
+			add_line(container, &input, first_nonspace);
 
 		} else {
 			assert(false);
diff --git a/src/buffer.c b/src/buffer.c
index cfc6a7e..dc4a405 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -95,7 +95,7 @@ void gh_buf_clear(gh_buf *buf)
 
 int gh_buf_set(gh_buf *buf, const unsigned char *data, int len)
 {
-	if (len == 0 || data == NULL) {
+	if (len <= 0 || data == NULL) {
 		gh_buf_clear(buf);
 	} else {
 		if (data != buf->ptr) {
@@ -125,6 +125,9 @@ int gh_buf_putc(gh_buf *buf, int c)
 
 int gh_buf_put(gh_buf *buf, const unsigned char *data, int len)
 {
+	if (len <= 0)
+		return 0;
+
 	ENSURE_SIZE(buf, buf->size + len + 1);
 	memmove(buf->ptr + buf->size, data, len);
 	buf->size += len;
@@ -272,15 +275,28 @@ void gh_buf_truncate(gh_buf *buf, int len)
 	}
 }
 
+void gh_buf_drop(gh_buf *buf, int n)
+{
+	if (n > 0) {
+		buf->size = buf->size - n;
+		if (buf->size)
+			memmove(buf->ptr, buf->ptr + n, buf->size);
+
+		buf->ptr[buf->size] = '\0';
+	}
+}
+
 void gh_buf_trim(gh_buf *buf)
 {
-	/* TODO: leading whitespace? */
-	/*
+	int i = 0;
+
+	if (!buf->size)
+		return;
+
 	while (i < buf->size && isspace(buf->ptr[i]))
 		i++;
 
-	gh_buf_truncate(buf, i);
-	*/
+	gh_buf_drop(buf, i);
 
 	/* rtrim */
 	while (buf->size > 0) {
diff --git a/src/buffer.h b/src/buffer.h
index 422ef02..0d5143e 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -105,8 +105,8 @@ extern void gh_buf_clear(gh_buf *buf);
 
 int gh_buf_strchr(const gh_buf *buf, int c, int pos);
 int gh_buf_strrchr(const gh_buf *buf, int c, int pos);
+void gh_buf_drop(gh_buf *buf, int n);
 void gh_buf_truncate(gh_buf *buf, int len);
-void gh_buf_ltruncate(gh_buf *buf, int len);
 void gh_buf_trim(gh_buf *buf);
 
 #endif
diff --git a/src/html/houdini_href_e.c b/src/html/houdini_href_e.c
index 59fe850..b2a7d79 100644
--- a/src/html/houdini_href_e.c
+++ b/src/html/houdini_href_e.c
@@ -62,16 +62,8 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
 		while (i < size && HREF_SAFE[src[i]] != 0)
 			i++;
 
-		if (likely(i > org)) {
-			if (unlikely(org == 0)) {
-				if (i >= size)
-					return 0;
-
-				gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
-			}
-
+		if (likely(i > org))
 			gh_buf_put(ob, src + org, i - org);
-		}
 
 		/* escaping */
 		if (i >= size)
diff --git a/src/html/houdini_html_e.c b/src/html/houdini_html_e.c
index 316c5ce..5cdd3dd 100644
--- a/src/html/houdini_html_e.c
+++ b/src/html/houdini_html_e.c
@@ -54,16 +54,8 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
 		while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
 			i++;
 
-		if (i > org) {
-			if (unlikely(org == 0)) {
-				if (i >= size)
-					return 0;
-
-				gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
-			}
-
+		if (i > org)
 			gh_buf_put(ob, src + org, i - org);
-		}
 
 		/* escaping */
 		if (unlikely(i >= size))
diff --git a/src/html/html.c b/src/html/html.c
index 2f160ca..27ffe58 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -68,7 +68,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
 				cr(html);
 				gh_buf_puts(html, "<li>");
 				blocks_to_html(html, b->children, tight);
-				gh_buf_trim(html);
+				gh_buf_trim(html); /* TODO: rtrim */
 				gh_buf_puts(html, "</li>");
 				cr(html);
 				break;
@@ -106,7 +106,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
 				cr(html);
 				gh_buf_puts(html, "<pre><code>");
 				escape_html(html, b->string_content.ptr, b->string_content.size);
-				gh_buf_puts(html, "</pre></code>");
+				gh_buf_puts(html, "</code></pre>");
 				cr(html);
 				break;
 
diff --git a/src/inlines.c b/src/inlines.c
index 7b48ad9..ef27a24 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -9,10 +9,10 @@
 #include "scanners.h"
 
 typedef struct Subject {
-  const gh_buf   *buffer;
-  int            pos;
-  reference**    reference_map;
-  int            label_nestlevel;
+	chunk input;
+	int pos;
+	int            label_nestlevel;
+	reference**    reference_map;
 } subject;
 
 reference* lookup_reference(reference** refmap, chunk *label);
@@ -27,12 +27,16 @@ inline static void chunk_trim(chunk *c);
 
 inline static chunk chunk_literal(const char *data);
 inline static chunk chunk_buf_detach(gh_buf *buf);
-inline static chunk chunk_buf(const gh_buf *buf, int pos, int len);
+inline static chunk chunk_dup(const chunk *ch, int pos, int len);
 
 static inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
 static inl *parse_inlines_while(subject* subj, int (*f)(subject*));
 static int parse_inline(subject* subj, inl ** last);
 
+static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap);
+static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap);
+static int subject_find_special_char(subject *subj);
+
 extern void free_reference(reference *ref) {
 	free(ref->label);
 	free(ref->url);
@@ -101,10 +105,12 @@ extern reference* make_reference(chunk *label, chunk *url, chunk *title)
 extern void add_reference(reference** refmap, reference* ref)
 {
 	reference * t = NULL;
-	HASH_FIND(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), t);
+	const char *label = (const char *)ref->label;
+
+	HASH_FIND(hh, *refmap, label, strlen(label), t);
 
 	if (t == NULL) {
-		HASH_ADD_KEYPTR(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), ref);
+		HASH_ADD_KEYPTR(hh, *refmap, label, strlen(label), ref);
 	} else {
 		free_reference(ref);  // we free this now since it won't be in the refmap
 	}
@@ -210,87 +216,49 @@ inline static inl* append_inlines(inl* a, inl* b)
 	return a;
 }
 
-// Make a 'subject' from an input string.
-static void init_subject(subject *e, gh_buf *buffer, int input_pos, reference** refmap)
+static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap)
 {
-	e->buffer = buffer;
-	e->pos = input_pos;
+	e->input.data = buffer->ptr;
+	e->input.len = buffer->size;
+	e->input.alloc = 0;
+	e->pos = 0;
 	e->label_nestlevel = 0;
 	e->reference_map = refmap;
-}
-
-inline static int isbacktick(int c)
-{
-	return (c == '`');
-}
-
-inline static void chunk_free(chunk *c)
-{
-	if (c->alloc)
-		free((char *)c->data);
-
-	c->data = NULL;
-	c->alloc = 0;
-	c->len = 0;
-}
-
-inline static void chunk_trim(chunk *c)
-{
-	while (c->len && isspace(c->data[0])) {
-		c->data++;
-		c->len--;
-	}
-
-	while (c->len > 0) {
-		if (!isspace(c->data[c->len - 1]))
-			break;
 
-		c->len--;
-	}
+	chunk_rtrim(&e->input);
 }
 
-inline static unsigned char *chunk_to_cstr(chunk *c)
+static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap)
 {
-	unsigned char *str;
-
-	str = malloc(c->len + 1);
-	memcpy(str, c->data, c->len);
-	str[c->len] = 0;
+	e->input.data = chunk->data;
+	e->input.len = chunk->len;
+	e->input.alloc = 0;
+	e->pos = 0;
+	e->label_nestlevel = 0;
+	e->reference_map = refmap;
 
-	return str;
+	chunk_rtrim(&e->input);
 }
 
-inline static chunk chunk_literal(const char *data)
+inline static int isbacktick(int c)
 {
-	chunk c = {data, data ? strlen(data) : 0, 0};
-	return c;
+	return (c == '`');
 }
 
-inline static chunk chunk_buf(const gh_buf *buf, int pos, int len)
+static inline unsigned char peek_char(subject *subj)
 {
-	chunk c = {buf->ptr + pos, len, 0};
-	return c;
+	return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
 }
 
-inline static chunk chunk_buf_detach(gh_buf *buf)
+static inline unsigned char peek_at(subject *subj, int pos)
 {
-	chunk c;
-
-	c.len = buf->size;
-	c.data = gh_buf_detach(buf);
-	c.alloc = 1;
-
-	return c;
+	return subj->input.data[pos];
 }
 
-// Return the next character in the subject, without advancing.
-// Return 0 if at the end of the subject.
-#define peek_char(subj) gh_buf_at((subj)->buffer, (subj)->pos)
-
 // Return true if there are more characters in the subject.
 inline static int is_eof(subject* subj)
 {
-	return (subj->pos >= gh_buf_len(subj->buffer));
+	return (subj->pos >= subj->input.len);
 }
 
 // Advance the subject.  Doesn't check for eof.
@@ -308,7 +276,7 @@ inline static chunk take_while(subject* subj, int (*f)(int))
 		len++;
 	}
 
-	return chunk_buf(subj->buffer, startpos, len);
+	return chunk_dup(&subj->input, startpos, len);
 }
 
 // Try to process a backtick code span that began with a
@@ -388,7 +356,7 @@ static inl* handle_backticks(subject *subj)
 	} else {
 		gh_buf buf = GH_BUF_INIT;
 
-		gh_buf_set(&buf, subj->buffer->ptr + startpos, endpos - startpos - openticks.len);
+		gh_buf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
 		gh_buf_trim(&buf);
 		normalize_whitespace(&buf);
 
@@ -404,7 +372,7 @@ static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close)
 	char char_before, char_after;
 	int startpos = subj->pos;
 
-	char_before = subj->pos == 0 ? '\n' : gh_buf_at(subj->buffer, subj->pos - 1);
+	char_before = subj->pos == 0 ? '\n' : peek_at(subj, subj->pos - 1);
 	while (peek_char(subj) == c) {
 		numdelims++;
 		advance(subj);
@@ -439,7 +407,7 @@ static inl* handle_strong_emph(subject* subj, char c)
 	numdelims = scan_delims(subj, c, &can_open, &can_close);
 	subj->pos += numdelims;
 
-	new = make_str(chunk_buf(subj->buffer, subj->pos - numdelims, numdelims));
+	new = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims));
 	*last = new;
 	first_head = new;
 	result = new;
@@ -488,7 +456,7 @@ static inl* handle_strong_emph(subject* subj, char c)
 				numdelims = scan_delims(subj, c, &can_open, &can_close);
 				if (can_close && numdelims >= 1 && numdelims <= 3 &&
 						numdelims != first_close_delims) {
-					new = make_str(chunk_buf(subj->buffer, subj->pos, numdelims));
+					new = make_str(chunk_dup(&subj->input, subj->pos, numdelims));
 					append_inlines(*last, new);
 					*last = new;
 					if (first_close_delims == 1 && numdelims > 2) {
@@ -554,7 +522,7 @@ static inl* handle_backslash(subject *subj)
 	unsigned char nextchar = peek_char(subj);
 	if (ispunct(nextchar)) {  // only ascii symbols and newline can be escaped
 		advance(subj);
-		return make_str(chunk_buf(subj->buffer, subj->pos - 1, 1));
+		return make_str(chunk_dup(&subj->input, subj->pos - 1, 1));
 	} else if (nextchar == '\n') {
 		advance(subj);
 		return make_linebreak();
@@ -569,9 +537,9 @@ static inl* handle_entity(subject* subj)
 {
 	int match;
 	inl *result;
-	match = scan_entity(subj->buffer, subj->pos);
+	match = scan_entity(&subj->input, subj->pos);
 	if (match) {
-		result = make_entity(chunk_buf(subj->buffer, subj->pos, match));
+		result = make_entity(chunk_dup(&subj->input, subj->pos, match));
 		subj->pos += match;
 	} else {
 		advance(subj);
@@ -584,15 +552,13 @@ static inl* handle_entity(subject* subj)
 // Returns an inline sequence consisting of str and entity elements.
 static inl *make_str_with_entities(chunk *content)
 {
-	inl * result = NULL;
-	inl * new;
+	inl *result = NULL;
+	inl *new;
 	int searchpos;
 	char c;
 	subject subj;
-	gh_buf content_buf = GH_BUF_INIT;
 
-	gh_buf_set(&content_buf, content->data, content->len);
-	init_subject(&subj, &content_buf, 0, NULL);
+	subject_from_chunk(&subj, content, NULL);
 
 	while ((c = peek_char(&subj))) {
 		switch (c) {
@@ -600,18 +566,13 @@ static inl *make_str_with_entities(chunk *content)
 				new = handle_entity(&subj);
 				break;
 			default:
-				searchpos = gh_buf_strchr(subj.buffer, '&', subj.pos);
-				if (searchpos < 0) {
-					searchpos = gh_buf_len(subj.buffer);
-				}
-
-				new = make_str(chunk_buf(subj.buffer, subj.pos, searchpos - subj.pos));
+				searchpos = chunk_strchr(&subj.input, '&', subj.pos);
+				new = make_str(chunk_dup(&subj.input, subj.pos, searchpos - subj.pos));
 				subj.pos = searchpos;
 		}
 		result = append_inlines(result, new);
 	}
 
-	gh_buf_free(&content_buf);
 	return result;
 }
 
@@ -678,9 +639,9 @@ static inl* handle_pointy_brace(subject* subj)
 	advance(subj);  // advance past first <
 
 	// first try to match a URL autolink
-	matchlen = scan_autolink_uri(subj->buffer, subj->pos);
+	matchlen = scan_autolink_uri(&subj->input, subj->pos);
 	if (matchlen > 0) {
-		contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1);
+		contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
 		subj->pos += matchlen;
 
 		return make_link(
@@ -691,11 +652,11 @@ static inl* handle_pointy_brace(subject* subj)
 	}
 
 	// next try to match an email autolink
-	matchlen = scan_autolink_email(subj->buffer, subj->pos);
+	matchlen = scan_autolink_email(&subj->input, subj->pos);
 	if (matchlen > 0) {
 		gh_buf mail_url = GH_BUF_INIT;
 
-		contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1);
+		contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
 		subj->pos += matchlen;
 
 		gh_buf_puts(&mail_url, "mailto:");
@@ -709,9 +670,9 @@ static inl* handle_pointy_brace(subject* subj)
 	}
 
 	// finally, try to match an html tag
-	matchlen = scan_html_tag(subj->buffer, subj->pos);
+	matchlen = scan_html_tag(&subj->input, subj->pos);
 	if (matchlen > 0) {
-		contents = chunk_buf(subj->buffer, subj->pos - 1, matchlen + 1);
+		contents = chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
 		subj->pos += matchlen;
 		return make_raw_html(contents);
 	}
@@ -776,12 +737,7 @@ static int link_label(subject* subj, chunk *raw_label)
 		}
 	}
 	if (c == ']') {
-		*raw_label = chunk_buf(
-			subj->buffer,
-			startpos + 1,
-			subj->pos - (startpos + 1)
-		);
-
+		*raw_label = chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
 		subj->label_nestlevel = 0;
 		advance(subj);  // advance past ]
 		return 1;
@@ -813,25 +769,25 @@ static inl* handle_left_bracket(subject* subj)
 
 	if (found_label) {
 		if (peek_char(subj) == '(' &&
-				((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) &&
-				((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) {
+				((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
+				((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {
 
 			// try to parse an explicit link:
 			starturl = subj->pos + 1 + sps; // after (
 			endurl = starturl + n;
-			starttitle = endurl + scan_spacechars(subj->buffer, endurl);
+			starttitle = endurl + scan_spacechars(&subj->input, endurl);
 
 			// ensure there are spaces btw url and title
 			endtitle = (starttitle == endurl) ? starttitle :
-				starttitle + scan_link_title(subj->buffer, starttitle);
+				starttitle + scan_link_title(&subj->input, starttitle);
 
-			endall = endtitle + scan_spacechars(subj->buffer, endtitle);
+			endall = endtitle + scan_spacechars(&subj->input, endtitle);
 
-			if (gh_buf_at(subj->buffer, endall) == ')') {
+			if (peek_at(subj, endall) == ')') {
 				subj->pos = endall + 1;
 
-				url = chunk_buf(subj->buffer, starturl, endurl - starturl);
-				title = chunk_buf(subj->buffer, starttitle, endtitle - starttitle);
+				url = chunk_dup(&subj->input, starturl, endurl - starturl);
+				title = chunk_dup(&subj->input, starttitle, endtitle - starttitle);
 				lab = parse_chunk_inlines(&rawlabel, NULL);
 
 				return make_link(lab, url, title);
@@ -850,7 +806,7 @@ static inl* handle_left_bracket(subject* subj)
 
 			// Check for reference link.
 			// First, see if there's another label:
-			subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel);
+			subj->pos = subj->pos + scan_spacechars(&subj->input, endlabel);
 			reflabel = rawlabel;
 
 			// if followed by a nonempty link label, we change reflabel to it:
@@ -892,8 +848,8 @@ static inl* handle_newline(subject *subj)
 		advance(subj);
 	}
 	if (nlpos > 1 &&
-			gh_buf_at(subj->buffer, nlpos - 1) == ' ' &&
-			gh_buf_at(subj->buffer, nlpos - 2) == ' ') {
+			peek_at(subj, nlpos - 1) == ' ' &&
+			peek_at(subj, nlpos - 2) == ' ') {
 		return make_linebreak();
 	} else {
 		return make_softbreak();
@@ -917,30 +873,22 @@ extern inl* parse_inlines_while(subject* subj, int (*f)(subject*))
 
 inl *parse_chunk_inlines(chunk *chunk, reference** refmap)
 {
-	inl *result;
 	subject subj;
-	gh_buf full_chunk = GH_BUF_INIT;
-
-	gh_buf_set(&full_chunk, chunk->data, chunk->len);
-	init_subject(&subj, &full_chunk, 0, refmap);
-	result = parse_inlines_while(&subj, not_eof);
-
-	gh_buf_free(&full_chunk);
-	return result;
+	subject_from_chunk(&subj, chunk, refmap);
+	return parse_inlines_while(&subj, not_eof);
 }
 
-static int find_special_char(subject *subj)
+static int subject_find_special_char(subject *subj)
 {
 	int n = subj->pos + 1;
-	int size = (int)gh_buf_len(subj->buffer);
 
-	while (n < size) {
-		if (strchr("\n\\`&_*[]<!", gh_buf_at(subj->buffer, n)))
+	while (n < subj->input.len) {
+		if (strchr("\n\\`&_*[]<!", subj->input.data[n]))
 			return n;
 		n++;
 	}
 
-	return -1;
+	return subj->input.len;
 }
 
 // Parse an inline, advancing subject, and add it to last element.
@@ -973,11 +921,13 @@ static int parse_inline(subject* subj, inl ** last)
 			new = handle_pointy_brace(subj);
 			break;
 		case '_':
-			if (subj->pos > 0 && (isalnum(gh_buf_at(subj->buffer, subj->pos - 1)) ||
-						gh_buf_at(subj->buffer, subj->pos - 1) == '_')) {
-				new = make_str(chunk_literal("_"));
-				advance(subj);
-				break;
+			if (subj->pos > 0) {
+				unsigned char prev = peek_at(subj, subj->pos - 1);
+				if (isalnum(prev) || prev == '_') {
+					new = make_str(chunk_literal("_"));
+					advance(subj);
+					break;
+				}
 			}
 
 			new = handle_strong_emph(subj, '_');
@@ -1002,18 +952,13 @@ static int parse_inline(subject* subj, inl ** last)
 			}
 			break;
 		default:
-		text_literal:
-			endpos = find_special_char(subj);
-			if (endpos < 0) {
-				endpos = gh_buf_len(subj->buffer);
-			}
-
-			contents = chunk_buf(subj->buffer, subj->pos, endpos - subj->pos);
+			endpos = subject_find_special_char(subj);
+			contents = chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
 			subj->pos = endpos;
 
 			// if we're at a newline, strip trailing spaces.
 			if (peek_char(subj) == '\n') {
-				chunk_trim(&contents);
+				chunk_rtrim(&contents);
 			}
 
 			new = make_str(contents);
@@ -1026,10 +971,10 @@ static int parse_inline(subject* subj, inl ** last)
 	return 1;
 }
 
-extern inl* parse_inlines(gh_buf *input, int input_pos, reference** refmap)
+extern inl* parse_inlines(gh_buf *input, reference** refmap)
 {
 	subject subj;
-	init_subject(&subj, input, input_pos, refmap);
+	subject_from_buf(&subj, input, refmap);
 	return parse_inlines_while(&subj, not_eof);
 }
 
@@ -1048,7 +993,7 @@ void spnl(subject* subj)
 // Modify refmap if a reference is encountered.
 // Return 0 if no reference found, otherwise position of subject
 // after reference is parsed.
-extern int parse_reference(gh_buf *input, int input_pos, reference** refmap)
+extern int parse_reference(gh_buf *input, reference** refmap)
 {
 	subject subj;
 
@@ -1058,9 +1003,9 @@ extern int parse_reference(gh_buf *input, int input_pos, reference** refmap)
 
 	int matchlen = 0;
 	int beforetitle;
-	reference * new = NULL;
+	reference *new = NULL;
 
-	init_subject(&subj, input, input_pos, NULL);
+	subject_from_buf(&subj, input, NULL);
 
 	// parse label:
 	if (!link_label(&subj, &lab))
@@ -1075,9 +1020,9 @@ extern int parse_reference(gh_buf *input, int input_pos, reference** refmap)
 
 	// parse link url:
 	spnl(&subj);
-	matchlen = scan_link_url(subj.buffer, subj.pos);
+	matchlen = scan_link_url(&subj.input, subj.pos);
 	if (matchlen) {
-		url = chunk_buf(subj.buffer, subj.pos, matchlen);
+		url = chunk_dup(&subj.input, subj.pos, matchlen);
 		subj.pos += matchlen;
 	} else {
 		return 0;
@@ -1086,9 +1031,9 @@ extern int parse_reference(gh_buf *input, int input_pos, reference** refmap)
 	// parse optional link_title
 	beforetitle = subj.pos;
 	spnl(&subj);
-	matchlen = scan_link_title(subj.buffer, subj.pos);
+	matchlen = scan_link_title(&subj.input, subj.pos);
 	if (matchlen) {
-		title = chunk_buf(subj.buffer, subj.pos, matchlen);
+		title = chunk_dup(&subj.input, subj.pos, matchlen);
 		subj.pos += matchlen;
 	} else {
 		subj.pos = beforetitle;
diff --git a/src/print.c b/src/print.c
index 0a87925..c262995 100644
--- a/src/print.c
+++ b/src/print.c
@@ -9,7 +9,7 @@ static void print_str(const unsigned char *s, int len)
 	int i;
 
 	if (len < 0)
-		len = strlen(s);
+		len = strlen((char *)s);
 
 	putchar('"');
 	for (i = 0; i < len; ++i) {
diff --git a/src/scanners.h b/src/scanners.h
index b6e586b..f96c42d 100644
--- a/src/scanners.h
+++ b/src/scanners.h
@@ -1,15 +1,15 @@
-#include "buffer.h"
+#include "stmd.h"
 
-int scan_autolink_uri(const gh_buf *s, int pos);
-int scan_autolink_email(const gh_buf *s, int pos);
-int scan_html_tag(const gh_buf *s, int pos);
-int scan_html_block_tag(const gh_buf *s, int pos);
-int scan_link_url(const gh_buf *s, int pos);
-int scan_link_title(const gh_buf *s, int pos);
-int scan_spacechars(const gh_buf *s, int pos);
-int scan_atx_header_start(const gh_buf *s, int pos);
-int scan_setext_header_line(const gh_buf *s, int pos);
-int scan_hrule(const gh_buf *s, int pos);
-int scan_open_code_fence(const gh_buf *s, int pos);
-int scan_close_code_fence(const gh_buf *s, int pos, int len);
-int scan_entity(const gh_buf *s, int pos);
+int scan_autolink_uri(chunk *c, int offset);
+int scan_autolink_email(chunk *c, int offset);
+int scan_html_tag(chunk *c, int offset);
+int scan_html_block_tag(chunk *c, int offset);
+int scan_link_url(chunk *c, int offset);
+int scan_link_title(chunk *c, int offset);
+int scan_spacechars(chunk *c, int offset);
+int scan_atx_header_start(chunk *c, int offset);
+int scan_setext_header_line(chunk *c, int offset);
+int scan_hrule(chunk *c, int offset);
+int scan_open_code_fence(chunk *c, int offset);
+int scan_close_code_fence(chunk *c, int offset, int len);
+int scan_entity(chunk *c, int offset);
diff --git a/src/scanners.re b/src/scanners.re
index 7323ef9..5ac7c15 100644
--- a/src/scanners.re
+++ b/src/scanners.re
@@ -1,8 +1,15 @@
-#include "buffer.h"
+#include "scanners.h"
+
+#define SCAN_DATA \
+  const unsigned char *marker = NULL; \
+  const unsigned char *p = c->data + offset; \
+  const unsigned char *start = p; \
+  const unsigned char *end = c->data + c->len
 
 /*!re2c
   re2c:define:YYCTYPE  = "unsigned char";
   re2c:define:YYCURSOR = p;
+  re2c:define:YYLIMIT = end;
   re2c:define:YYMARKER = marker;
   re2c:define:YYCTXMARKER = marker;
   re2c:yyfill:enable = 0;
@@ -55,11 +62,9 @@
 */
 
 // Try to match URI autolink after first <, returning number of chars matched.
-extern int scan_autolink_uri(const gh_buf *s, int pos)
+extern int scan_autolink_uri(chunk *c, int offset)
 {
-  unsigned char * marker = NULL;
-  unsigned char * p = &(s->ptr[pos]);
-  unsigned char * start = p;
+  SCAN_DATA;
 /*!re2c
   scheme [:]([^\x00-\x20<>\\]|escaped_char)*[>]  { return (p - start); }
   .? { return 0; }
@@ -67,11 +72,9 @@ extern int scan_autolink_uri(const gh_buf *s, int pos)
 }
 
 // Try to match email autolink after first <, returning num of chars matched.
-extern int scan_autolink_email(const gh_buf *s, int pos)
+extern int scan_autolink_email(chunk *c, int offset)
 {
-  unsigned char * marker = NULL;
-  unsigned char * p = &(s->ptr[pos]);
-  unsigned char * start = p;
+  SCAN_DATA;
 /*!re2c
   [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+
     [@]
@@ -83,11 +86,9 @@ extern int scan_autolink_email(const gh_buf *s, int pos)
 }
 
 // Try to match an HTML tag after first <, returning num of chars matched.
-extern int scan_html_tag(const gh_buf *s, int pos)
+extern int scan_html_tag(chunk *c, int offset)
 {
-  unsigned char * marker = NULL;
-  unsigned char * p = &(s->ptr[pos]);
-  unsigned char * start = p;
+  SCAN_DATA;
 /*!re2c
   htmltag { return (p - start); }
   .? { return 0; }
@@ -96,11 +97,9 @@ extern int scan_html_tag(const gh_buf *s, int pos)
 
 // Try to match an HTML block tag including first <,
 // returning num of chars matched.
-extern int scan_html_block_tag(const gh_buf *s, int pos)
+extern int scan_html_block_tag(chunk *c, int offset)
 {
-  unsigned char * marker = NULL;
-  unsigned char * p = &(s->ptr[pos]);
-  unsigned char * start = p;
+  SCAN_DATA;
 /*!re2c
   [<] [/] blocktagname (spacechar | [>])  { return (p - start); }
   [<] blocktagname (spacechar | [/>]) { return (p - start); }
@@ -113,11 +112,9 @@ extern int scan_html_block_tag(const gh_buf *s, int pos)
 // This may optionally be contained in <..>; otherwise
 // whitespace and unbalanced right parentheses aren't allowed.
 // Newlines aren't ever allowed.
-extern int scan_link_url(const gh_buf *s, int pos)
+extern int scan_link_url(chunk *c, int offset)
 {
-  unsigned char * marker = NULL;
-  unsigned char * p = &(s->ptr[pos]);
-  unsigned char * start = p;
+  SCAN_DATA;
 /*!re2c
   [ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); }
   [ \n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); }
@@ -128,11 +125,9 @@ extern int scan_link_url(const gh_buf *s, int pos)
 // Try to match a link title (in single quotes, in double quotes, or
 // in parentheses), returning number of chars matched.  Allow one
 // level of internal nesting (quotes within quotes).
-extern int scan_link_title(const gh_buf *s, int pos)
+extern int scan_link_title(chunk *c, int offset)
 {
-  unsigned char * marker = NULL;
-  unsigned char * p = &(s->ptr[pos]);
-  unsigned char * start = p;
+  SCAN_DATA;
 /*!re2c
   ["] (escaped_char|[^"\x00])* ["]   { return (p - start); }
   ['] (escaped_char|[^'\x00])* ['] { return (p - start); }
@@ -142,10 +137,9 @@ extern int scan_link_title(const gh_buf *s, int pos)
 }
 
 // Match space characters, including newlines.
-extern int scan_spacechars(const gh_buf *s, int pos)
+extern int scan_spacechars(chunk *c, int offset)
 {
-  unsigned char * p = &(s->ptr[pos]);
-  unsigned char * start = p;
+  SCAN_DATA;
 /*!re2c
   [ \t\n]* { return (p - start); }
   . { return 0; }
@@ -153,11 +147,9 @@ extern int scan_spacechars(const gh_buf *s, int pos)
 }
 
 // Match ATX header start.
-extern int scan_atx_header_start(const gh_buf *s, int pos)
+extern int scan_atx_header_start(chunk *c, int offset)
 {
-  unsigned char * marker = NULL;
-  unsigned char * p = &(s->ptr[pos]);
-  unsigned char * start = p;
+  SCAN_DATA;
 /*!re2c
   [#]{1,6} ([ ]+|[\n])  { return (p - start); }
   .? { return 0; }
@@ -166,10 +158,9 @@ extern int scan_atx_header_start(const gh_buf *s, int pos)
 
 // Match sexext header line.  Return 1 for level-1 header,
 // 2 for level-2, 0 for no match.
-extern int scan_setext_header_line(const gh_buf *s, int pos)
+extern int scan_setext_header_line(chunk *c, int offset)
 {
-  unsigned char * marker = NULL;
-  unsigned char * p = &(s->ptr[pos]);
+  SCAN_DATA;
 /*!re2c
   [=]+ [ ]* [\n] { return 1; }
   [-]+ [ ]* [\n] { return 2; }
@@ -180,11 +171,9 @@ extern int scan_setext_header_line(const gh_buf *s, int pos)
 // Scan a horizontal rule line: "...three or more hyphens, asterisks,
 // or underscores on a line by themselves. If you wish, you may use
 // spaces between the hyphens or asterisks."
-extern int scan_hrule(const gh_buf *s, int pos)
+extern int scan_hrule(chunk *c, int offset)
 {
-  unsigned char * marker = NULL;
-  unsigned char * p = &(s->ptr[pos]);
-  unsigned char * start = p;
+  SCAN_DATA;
 /*!re2c
   ([*][ ]*){3,} [ \t]* [\n] { return (p - start); }
   ([_][ ]*){3,} [ \t]* [\n] { return (p - start); }
@@ -194,11 +183,9 @@ extern int scan_hrule(const gh_buf *s, int pos)
 }
 
 // Scan an opening code fence.
-extern int scan_open_code_fence(const gh_buf *s, int pos)
+extern int scan_open_code_fence(chunk *c, int offset)
 {
-  unsigned char * marker = NULL;
-  unsigned char * p = &(s->ptr[pos]);
-  unsigned char * start = p;
+  SCAN_DATA;
 /*!re2c
   [`]{3,} / [^`\n\x00]*[\n] { return (p - start); }
   [~]{3,} / [^~\n\x00]*[\n] { return (p - start); }
@@ -207,11 +194,9 @@ extern int scan_open_code_fence(const gh_buf *s, int pos)
 }
 
 // Scan a closing code fence with length at least len.
-extern int scan_close_code_fence(const gh_buf *s, int pos, int len)
+extern int scan_close_code_fence(chunk *c, int offset, int len)
 {
-  unsigned char * marker = NULL;
-  unsigned char * p = &(s->ptr[pos]);
-  unsigned char * start = p;
+  SCAN_DATA;
 /*!re2c
   ([`]{3,} | [~]{3,}) / spacechar* [\n]
                               { if (p - start > len) {
@@ -225,11 +210,9 @@ extern int scan_close_code_fence(const gh_buf *s, int pos, int len)
 
 // Scans an entity.
 // Returns number of chars matched.
-extern int scan_entity(const gh_buf *s, int pos)
+extern int scan_entity(chunk *c, int offset)
 {
-  unsigned char * marker = NULL;
-  unsigned char * p = &(s->ptr[pos]);
-  unsigned char * start = p;
+  SCAN_DATA;
 /*!re2c
   [&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;]
      { return (p - start); }
diff --git a/src/stmd.h b/src/stmd.h
index 3e284bd..4a3c399 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -1,17 +1,15 @@
+#ifndef _STDMD_H_
+#define _STDMD_H_
+
 #include <stdbool.h>
 #include <stdio.h>
 #include "buffer.h"
+#include "chunk.h"
 #include "uthash.h"
 
 #define VERSION "0.1"
 #define CODE_INDENT 4
 
-typedef struct {
-	const unsigned char *data;
-	int len;
-	int alloc;
-} chunk;
-
 typedef struct Inline {
 	enum { INL_STRING, INL_SOFTBREAK, INL_LINEBREAK, INL_CODE, INL_RAW_HTML, INL_ENTITY,
 		INL_EMPH, INL_STRONG, INL_LINK, INL_IMAGE } tag;
@@ -79,7 +77,6 @@ typedef struct Block {
   struct Block*      parent;
   struct Block*      top;
   gh_buf			 string_content;
-  int				 string_pos;
   inl*               inline_content;
   union  {
     struct ListData       list_data;
@@ -91,10 +88,10 @@ typedef struct Block {
   struct Block *     prev;
 } block;
 
-inl* parse_inlines(gh_buf *input, int input_pos, reference** refmap);
+inl* parse_inlines(gh_buf *input, reference** refmap);
 void free_inlines(inl* e);
 
-int parse_reference(gh_buf *input, int input_pos, reference** refmap);
+int parse_reference(gh_buf *input, reference** refmap);
 void free_reference(reference *ref);
 void free_reference_map(reference **refmap);
 
@@ -117,3 +114,4 @@ void inlines_to_html(gh_buf *html, inl *b);
 
 void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len);
 
+#endif
-- 
cgit v1.2.3


From f5168c63ad305b3e331eb7d31efaf46b0541bba4 Mon Sep 17 00:00:00 2001
From: Vicent Marti <tanoku@gmail.com>
Date: Thu, 4 Sep 2014 06:41:18 +0200
Subject: 368/73

---
 src/blocks.c  | 1 +
 src/inlines.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'src/inlines.c')

diff --git a/src/blocks.c b/src/blocks.c
index 94ff986..bd25d6c 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -57,6 +57,7 @@ bool is_blank(gh_buf *s, int offset)
 				return true;
 			case ' ':
 				offset++;
+				break;
 			default:
 				return false;
 		}
diff --git a/src/inlines.c b/src/inlines.c
index ef27a24..ced4673 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -123,7 +123,7 @@ inline static inl* make_linkable(int t, inl* label, chunk url, chunk title)
 	e->tag = t;
 	e->content.linkable.label = label;
 	e->content.linkable.url   = chunk_to_cstr(&url);
-	e->content.linkable.title = url.len ? chunk_to_cstr(&title) : NULL;
+	e->content.linkable.title = title.len ? chunk_to_cstr(&title) : NULL;
 	e->next = NULL;
 	return e;
 }
-- 
cgit v1.2.3


From 45c1d9fadb3e8aab4a01bb27a4e2ece379902d1a Mon Sep 17 00:00:00 2001
From: Vicent Marti <tanoku@gmail.com>
Date: Thu, 4 Sep 2014 17:26:11 +0200
Subject: 426/15

---
 runtests.pl     |   3 ++
 spec.txt        |   6 ++--
 src/html/html.c |  22 +++++-------
 src/inlines.c   | 105 +++++++++++++++++++++++++++++---------------------------
 4 files changed, 69 insertions(+), 67 deletions(-)

(limited to 'src/inlines.c')

diff --git a/runtests.pl b/runtests.pl
index 2e2b795..e53938d 100644
--- a/runtests.pl
+++ b/runtests.pl
@@ -49,6 +49,7 @@ sub tidy
       s/  */ /;
       # collapse space before /> in tag
       s/  *\/>/\/>/;
+	  s/>\n$/>/;
       # skip blank line
       if (/^$/) {
         next;
@@ -89,8 +90,10 @@ sub dotest
     print $markdown;
     print "=== expected ===============\n";
     print $html;
+	print "\n";
     print "=== got ====================\n";
     print $actual;
+	print "\n";
     print color "black";
     return 0;
   }
diff --git a/spec.txt b/spec.txt
index 82ae0b6..d7e70f5 100644
--- a/spec.txt
+++ b/spec.txt
@@ -1682,7 +1682,7 @@ them.
 
 [Foo bar]
 .
-<p><a href="my url" title="title">Foo bar</a></p>
+<p><a href="my%20url" title="title">Foo bar</a></p>
 .
 
 The title may be omitted:
@@ -1745,7 +1745,7 @@ case-insensitive (see [matches](#matches)).
 
 [αγω]
 .
-<p><a href="/φου">αγω</a></p>
+<p><a href="/%CF%86%CE%BF%CF%85">αγω</a></p>
 .
 
 Here is a link reference definition with no corresponding link.
@@ -3688,7 +3688,7 @@ raw HTML:
 .
 <http://google.com?find=\*>
 .
-<p><a href="http://google.com?find=\*">http://google.com?find=\*</a></p>
+<p><a href="http://google.com?find=%5C*">http://google.com?find=\*</a></p>
 .
 
 .
diff --git a/src/html/html.c b/src/html/html.c
index 2a65a63..cdccf2a 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -50,17 +50,15 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
 					cr(html);
 					gh_buf_puts(html, "<p>");
 					inlines_to_html(html, b->inline_content);
-					gh_buf_puts(html, "</p>");
-					cr(html);
+					gh_buf_puts(html, "</p>\n");
 				}
 				break;
 
 			case block_quote:
 				cr(html);
-				gh_buf_puts(html, "<blockquote>");
+				gh_buf_puts(html, "<blockquote>\n");
 				blocks_to_html(html, b->children, false);
-				gh_buf_puts(html, "</blockquote>");
-				cr(html);
+				gh_buf_puts(html, "</blockquote>\n");
 				break;
 
 			case list_item:
@@ -68,8 +66,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
 				gh_buf_puts(html, "<li>");
 				blocks_to_html(html, b->children, tight);
 				gh_buf_trim(html); /* TODO: rtrim */
-				gh_buf_puts(html, "</li>");
-				cr(html);
+				gh_buf_puts(html, "</li>\n");
 				break;
 
 			case list:
@@ -87,7 +84,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
 
 				blocks_to_html(html, b->children, data->tight);
 				gh_buf_puts(html, data->list_type == bullet ? "</ul>" : "</ol>");
-				cr(html);
+				gh_buf_putc(html, '\n');
 				break;
 
 			case atx_header:
@@ -95,8 +92,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
 				cr(html);
 				gh_buf_printf(html, "<h%d>", b->attributes.header_level);
 				inlines_to_html(html, b->inline_content);
-				gh_buf_printf(html, "</h%d>", b->attributes.header_level);
-				cr(html);
+				gh_buf_printf(html, "</h%d>\n", b->attributes.header_level);
 				break;
 
 			case indented_code:
@@ -122,8 +118,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
 
 				gh_buf_puts(html, "><code>");
 				escape_html(html, b->string_content.ptr, b->string_content.size);
-				gh_buf_puts(html, "</code></pre>");
-				cr(html);
+				gh_buf_puts(html, "</code></pre>\n");
 				break;
 
 			case html_block:
@@ -131,8 +126,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
 				break;
 
 			case hrule:
-				gh_buf_puts(html, "<hr />");
-				cr(html);
+				gh_buf_puts(html, "<hr />\n");
 				break;
 
 			case reference_def:
diff --git a/src/inlines.c b/src/inlines.c
index ced4673..a0dcac9 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -1,8 +1,8 @@
 #include <stdlib.h>
+#include <string.h>
 #include <stdio.h>
 #include <stdbool.h>
 #include <ctype.h>
-#include <string.h>
 
 #include "stmd.h"
 #include "uthash.h"
@@ -18,7 +18,7 @@ typedef struct Subject {
 reference* lookup_reference(reference** refmap, chunk *label);
 reference* make_reference(chunk *label, chunk *url, chunk *title);
 
-static unsigned char *clean_url(chunk *url);
+static unsigned char *clean_url(chunk *url, int is_email);
 static unsigned char *clean_title(chunk *title);
 
 inline static unsigned char *chunk_to_cstr(chunk *c);
@@ -97,7 +97,7 @@ extern reference* make_reference(chunk *label, chunk *url, chunk *title)
 	reference *ref;
 	ref = malloc(sizeof(reference));
 	ref->label = normalize_reference(label);
-	ref->url = clean_url(url);
+	ref->url = clean_url(url, 0);
 	ref->title = clean_title(title);
 	return ref;
 }
@@ -116,14 +116,25 @@ extern void add_reference(reference** refmap, reference* ref)
 	}
 }
 
+inline static inl* make_link_from_reference(inl* label, reference *ref)
+{
+	inl* e = (inl*) malloc(sizeof(inl));
+	e->tag = INL_LINK;
+	e->content.linkable.label = label;
+	e->content.linkable.url   = strdup(ref->url);
+	e->content.linkable.title = ref->title ? strdup(ref->title) : NULL;
+	e->next = NULL;
+	return e;
+}
+
 // Create an inline with a linkable string value.
-inline static inl* make_linkable(int t, inl* label, chunk url, chunk title)
+inline static inl* make_link(inl* label, chunk url, chunk title, int is_email)
 {
 	inl* e = (inl*) malloc(sizeof(inl));
-	e->tag = t;
+	e->tag = INL_LINK;
 	e->content.linkable.label = label;
-	e->content.linkable.url   = chunk_to_cstr(&url);
-	e->content.linkable.title = title.len ? chunk_to_cstr(&title) : NULL;
+	e->content.linkable.url   = clean_url(&url, is_email);
+	e->content.linkable.title = clean_title(&title);
 	e->next = NULL;
 	return e;
 }
@@ -163,7 +174,6 @@ inline static inl* make_simple(int t)
 #define make_entity(s) make_literal(INL_ENTITY, s)
 #define make_linebreak() make_simple(INL_LINEBREAK)
 #define make_softbreak() make_simple(INL_SOFTBREAK)
-#define make_link(label, url, title) make_linkable(INL_LINK, label, url, title)
 #define make_emph(contents) make_inlines(INL_EMPH, contents)
 #define make_strong(contents) make_inlines(INL_STRONG, contents)
 
@@ -309,37 +319,27 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
 // space and newline characters into a single space.
 static void normalize_whitespace(gh_buf *s)
 {
-	/* TODO */
-#if 0
 	bool last_char_was_space = false;
-	int pos = 0;
-	char c;
-	while ((c = gh_buf_at(s, pos))) {
-		switch (c) {
-			case ' ':
-				if (last_char_was_space) {
-					bdelete(s, pos, 1);
-				} else {
-					pos++;
-				}
-				last_char_was_space = true;
-				break;
-			case '\n':
-				if (last_char_was_space) {
-					bdelete(s, pos, 1);
-				} else {
-					bdelete(s, pos, 1);
-					binsertch(s, pos, 1, ' ');
-					pos++;
-				}
-				last_char_was_space = true;
+	int r, w;
+
+	for (r = 0, w = 0; r < s->size; ++r) {
+		switch (s->ptr[r]) {
+		case ' ':
+		case '\n':
+			if (last_char_was_space)
 				break;
-			default:
-				pos++;
-				last_char_was_space = false;
+
+			s->ptr[w++] = ' ';
+			last_char_was_space = true;
+			break;
+
+		default:
+			s->ptr[w++] = s->ptr[r];
+			last_char_was_space = false;
 		}
 	}
-#endif
+
+	gh_buf_truncate(s, w);
 }
 
 // Parse backtick code section or raw backticks, return an inline.
@@ -593,16 +593,19 @@ extern void unescape_buffer(gh_buf *buf)
 
 // Clean a URL: remove surrounding whitespace and surrounding <>,
 // and remove \ that escape punctuation.
-static unsigned char *clean_url(chunk *url)
+static unsigned char *clean_url(chunk *url, int is_email)
 {
 	gh_buf buf = GH_BUF_INIT;
 
 	chunk_trim(url);
 
+	if (is_email)
+		gh_buf_puts(&buf, "mailto:");
+
 	if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
-		gh_buf_set(&buf, url->data + 1, url->len - 2);
+		gh_buf_put(&buf, url->data + 1, url->len - 2);
 	} else {
-		gh_buf_set(&buf, url->data, url->len);
+		gh_buf_put(&buf, url->data, url->len);
 	}
 
 	unescape_buffer(&buf);
@@ -613,8 +616,13 @@ static unsigned char *clean_url(chunk *url)
 static unsigned char *clean_title(chunk *title)
 {
 	gh_buf buf = GH_BUF_INIT;
-	unsigned char first = title->data[0];
-	unsigned char last = title->data[title->len - 1];
+	unsigned char first, last;
+
+	if (title->len == 0)
+		return NULL;
+
+	first = title->data[0];
+	last = title->data[title->len - 1];
 
 	// remove surrounding quotes if any:
 	if ((first == '\'' && last == '\'') ||
@@ -647,25 +655,22 @@ static inl* handle_pointy_brace(subject* subj)
 		return make_link(
 			make_str_with_entities(&contents),
 			contents,
-			chunk_literal("")
+			chunk_literal(""),
+			0
 		);
 	}
 
 	// next try to match an email autolink
 	matchlen = scan_autolink_email(&subj->input, subj->pos);
 	if (matchlen > 0) {
-		gh_buf mail_url = GH_BUF_INIT;
-
 		contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
 		subj->pos += matchlen;
 
-		gh_buf_puts(&mail_url, "mailto:");
-		gh_buf_put(&mail_url, contents.data, contents.len);
-
 		return make_link(
 				make_str_with_entities(&contents),
-				chunk_buf_detach(&mail_url),
-				chunk_literal("")
+				contents,
+				chunk_literal(""),
+				1
 		);
 	}
 
@@ -790,7 +795,7 @@ static inl* handle_left_bracket(subject* subj)
 				title = chunk_dup(&subj->input, starttitle, endtitle - starttitle);
 				lab = parse_chunk_inlines(&rawlabel, NULL);
 
-				return make_link(lab, url, title);
+				return make_link(lab, url, title, 0);
 			} else {
 				// if we get here, we matched a label but didn't get further:
 				subj->pos = endlabel;
@@ -821,7 +826,7 @@ static inl* handle_left_bracket(subject* subj)
 			ref = lookup_reference(subj->reference_map, &reflabel);
 			if (ref != NULL) { // found
 				lab = parse_chunk_inlines(&rawlabel, NULL);
-				result = make_link(lab, chunk_literal(ref->url), chunk_literal(ref->title));
+				result = make_link_from_reference(lab, ref);
 			} else {
 				subj->pos = endlabel;
 				lab = parse_chunk_inlines(&rawlabel, subj->reference_map);
-- 
cgit v1.2.3


From 9830d3a05a374a0d05676301bd4065917b59ad53 Mon Sep 17 00:00:00 2001
From: Vicent Marti <tanoku@gmail.com>
Date: Thu, 4 Sep 2014 17:42:12 +0200
Subject: 430/11

---
 src/html/houdini_html_e.c |  4 ++--
 src/html/html.c           |  5 ++---
 src/inlines.c             | 14 +++-----------
 3 files changed, 7 insertions(+), 16 deletions(-)

(limited to 'src/inlines.c')

diff --git a/src/html/houdini_html_e.c b/src/html/houdini_html_e.c
index 5cdd3dd..95b6c41 100644
--- a/src/html/houdini_html_e.c
+++ b/src/html/houdini_html_e.c
@@ -62,8 +62,8 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
 			break;
 
 		/* The forward slash is only escaped in secure mode */
-		if (src[i] == '/' && !secure) {
-			gh_buf_putc(ob, '/');
+		if ((src[i] == '/' || src[i] == '\'') && !secure) {
+			gh_buf_putc(ob, src[i]);
 		} else {
 			gh_buf_puts(ob, HTML_ESCAPES[esc]);
 		}
diff --git a/src/html/html.c b/src/html/html.c
index cdccf2a..913a602 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -191,10 +191,9 @@ void inlines_to_html(gh_buf *html, inl* ils)
 				escape_href(html, ils->content.linkable.url, -1);
 
 				inlines_to_html(&scrap, ils->content.inlines);
-				if (scrap.size) {
-					gh_buf_puts(html, "\" alt=\"");
+				gh_buf_puts(html, "\" alt=\"");
+				if (scrap.size)
 					escape_html(html, scrap.ptr, scrap.size);
-				}
 				gh_buf_clear(&scrap);
 
 				if (ils->content.linkable.title) {
diff --git a/src/inlines.c b/src/inlines.c
index a0dcac9..599be84 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -21,7 +21,6 @@ reference* make_reference(chunk *label, chunk *url, chunk *title);
 static unsigned char *clean_url(chunk *url, int is_email);
 static unsigned char *clean_title(chunk *title);
 
-inline static unsigned char *chunk_to_cstr(chunk *c);
 inline static void chunk_free(chunk *c);
 inline static void chunk_trim(chunk *c);
 
@@ -37,6 +36,8 @@ static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap);
 static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap);
 static int subject_find_special_char(subject *subj);
 
+static void normalize_whitespace(gh_buf *s);
+
 extern void free_reference(reference *ref) {
 	free(ref->label);
 	free(ref->url);
@@ -62,19 +63,10 @@ extern void free_reference_map(reference **refmap) {
 static unsigned char *normalize_reference(chunk *ref)
 {
 	gh_buf normalized = GH_BUF_INIT;
-	int r, w;
 
 	utf8proc_case_fold(&normalized, ref->data, ref->len);
 	gh_buf_trim(&normalized);
-
-	for (r = 0, w = 0; r < normalized.size; ++r) {
-		if (r && gh_buf_at(&normalized, r - 1) == ' ') {
-			while (gh_buf_at(&normalized, r) == ' ')
-				r++;
-		}
-
-		normalized.ptr[w++] = normalized.ptr[r];
-	}
+	normalize_whitespace(&normalized);
 
 	return gh_buf_detach(&normalized);
 }
-- 
cgit v1.2.3


From d8f44f1e4f0bd944ab43e6434a1579d670ed66cf Mon Sep 17 00:00:00 2001
From: Vicent Marti <tanoku@gmail.com>
Date: Thu, 4 Sep 2014 17:49:13 +0200
Subject: 433/8

---
 spec.txt        | 2 +-
 src/html/html.c | 6 ++++--
 src/inlines.c   | 3 +++
 src/print.c     | 5 ++++-
 4 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'src/inlines.c')

diff --git a/spec.txt b/spec.txt
index d7e70f5..cfda2a3 100644
--- a/spec.txt
+++ b/spec.txt
@@ -3946,7 +3946,7 @@ But this is a link:
 .
 <http://foo.bar.`baz>`
 .
-<p><a href="http://foo.bar.`baz">http://foo.bar.`baz</a>`</p>
+<p><a href="http://foo.bar.%60baz">http://foo.bar.`baz</a>`</p>
 .
 
 And this is an HTML tag:
diff --git a/src/html/html.c b/src/html/html.c
index 913a602..41b8fda 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -174,7 +174,8 @@ void inlines_to_html(gh_buf *html, inl* ils)
 
 			case INL_LINK:
 				gh_buf_puts(html, "<a href=\"");
-				escape_href(html, ils->content.linkable.url, -1);
+				if (ils->content.linkable.url)
+					escape_href(html, ils->content.linkable.url, -1);
 
 				if (ils->content.linkable.title) {
 					gh_buf_puts(html, "\" title=\"");
@@ -188,7 +189,8 @@ void inlines_to_html(gh_buf *html, inl* ils)
 
 			case INL_IMAGE:
 				gh_buf_puts(html, "<img src=\"");
-				escape_href(html, ils->content.linkable.url, -1);
+				if (ils->content.linkable.url)
+					escape_href(html, ils->content.linkable.url, -1);
 
 				inlines_to_html(&scrap, ils->content.inlines);
 				gh_buf_puts(html, "\" alt=\"");
diff --git a/src/inlines.c b/src/inlines.c
index 599be84..8e2e683 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -591,6 +591,9 @@ static unsigned char *clean_url(chunk *url, int is_email)
 
 	chunk_trim(url);
 
+	if (url->len == 0)
+		return NULL;
+
 	if (is_email)
 		gh_buf_puts(&buf, "mailto:");
 
diff --git a/src/print.c b/src/print.c
index c262995..832ad4f 100644
--- a/src/print.c
+++ b/src/print.c
@@ -153,7 +153,10 @@ extern void print_inlines(inl* ils, int indent)
 		case INL_LINK:
 		case INL_IMAGE:
 			printf("%s url=", ils->tag == INL_LINK ? "link" : "image");
-			print_str(ils->content.linkable.url, -1);
+
+			if (ils->content.linkable.url)
+				print_str(ils->content.linkable.url, -1);
+
 			if (ils->content.linkable.title) {
 				printf(" title=");
 				print_str(ils->content.linkable.title, -1);
-- 
cgit v1.2.3


From 543c2c94d71adee42c7bd2f8027d75c87ed8120d Mon Sep 17 00:00:00 2001
From: Vicent Marti <tanoku@gmail.com>
Date: Thu, 4 Sep 2014 18:38:14 +0200
Subject: Rename to strbuf

---
 src/blocks.c              | 64 +++++++++++++++----------------
 src/buffer.c              | 86 ++++++++++++++++++++---------------------
 src/buffer.h              | 80 +++++++++++++++++++-------------------
 src/chunk.h               |  4 +-
 src/html/houdini.h        | 22 +++++------
 src/html/houdini_href_e.c | 12 +++---
 src/html/houdini_html_e.c | 10 ++---
 src/html/html.c           | 98 +++++++++++++++++++++++------------------------
 src/inlines.c             | 50 ++++++++++++------------
 src/main.c                |  4 +-
 src/stmd.h                | 16 ++++----
 src/utf8.c                |  6 +--
 12 files changed, 226 insertions(+), 226 deletions(-)

(limited to 'src/inlines.c')

diff --git a/src/blocks.c b/src/blocks.c
index cf0e9e4..9faccd9 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -10,7 +10,7 @@
 
 #define peek_at(i, n) (i)->data[n]
 
-static void incorporate_line(gh_buf *ln, int line_number, block** curptr);
+static void incorporate_line(strbuf *ln, int line_number, block** curptr);
 static void finalize(block* b, int line_number);
 
 static block* make_block(int tag, int start_line, int start_column)
@@ -28,7 +28,7 @@ static block* make_block(int tag, int start_line, int start_column)
 	e->parent = NULL;
 	e->top = NULL;
 	e->attributes.refmap = NULL;
-	gh_buf_init(&e->string_content, 32);
+	strbuf_init(&e->string_content, 32);
 	e->inline_content = NULL;
 	e->next = NULL;
 	e->prev = NULL;
@@ -49,7 +49,7 @@ extern block* make_document()
 }
 
 // Returns true if line has only space characters, else false.
-bool is_blank(gh_buf *s, int offset)
+bool is_blank(strbuf *s, int offset)
 {
 	while (offset < s->size) {
 		switch (s->ptr[offset]) {
@@ -85,10 +85,10 @@ static inline bool accepts_lines(int block_type)
 static void add_line(block* block, chunk *ch, int offset)
 {
 	assert(block->open);
-	gh_buf_put(&block->string_content, ch->data + offset, ch->len - offset);
+	strbuf_put(&block->string_content, ch->data + offset, ch->len - offset);
 }
 
-static void remove_trailing_blank_lines(gh_buf *ln)
+static void remove_trailing_blank_lines(strbuf *ln)
 {
 	int i;
 
@@ -100,13 +100,13 @@ static void remove_trailing_blank_lines(gh_buf *ln)
 	}
 
 	if (i < 0) {
-		gh_buf_clear(ln);
+		strbuf_clear(ln);
 		return;
 	}
 
-	i = gh_buf_strchr(ln, '\n', i);
+	i = strbuf_strchr(ln, '\n', i);
 	if (i >= 0)
-		gh_buf_truncate(ln, i);
+		strbuf_truncate(ln, i);
 }
 
 // Check to see if a block ends with a blank line, descending
@@ -164,10 +164,10 @@ static void finalize(block* b, int line_number)
 	switch (b->tag) {
 		case paragraph:
 			pos = 0;
-			while (gh_buf_at(&b->string_content, 0) == '[' &&
+			while (strbuf_at(&b->string_content, 0) == '[' &&
 					(pos = parse_reference(&b->string_content, b->top->attributes.refmap))) {
 
-				gh_buf_drop(&b->string_content, pos);
+				strbuf_drop(&b->string_content, pos);
 			}
 			if (is_blank(&b->string_content, 0)) {
 				b->tag = reference_def;
@@ -176,23 +176,23 @@ static void finalize(block* b, int line_number)
 
 		case indented_code:
 			remove_trailing_blank_lines(&b->string_content);
-			gh_buf_putc(&b->string_content, '\n');
+			strbuf_putc(&b->string_content, '\n');
 			break;
 
 		case fenced_code:
 			// first line of contents becomes info
-			firstlinelen = gh_buf_strchr(&b->string_content, '\n', 0);
+			firstlinelen = strbuf_strchr(&b->string_content, '\n', 0);
 
-			gh_buf_init(&b->attributes.fenced_code_data.info, 0);
-			gh_buf_set(
+			strbuf_init(&b->attributes.fenced_code_data.info, 0);
+			strbuf_set(
 				&b->attributes.fenced_code_data.info,
 				b->string_content.ptr,
 				firstlinelen
 			);
 
-			gh_buf_drop(&b->string_content, firstlinelen + 1);
+			strbuf_drop(&b->string_content, firstlinelen + 1);
 
-			gh_buf_trim(&b->attributes.fenced_code_data.info);
+			strbuf_trim(&b->attributes.fenced_code_data.info);
 			unescape_buffer(&b->attributes.fenced_code_data.info);
 			break;
 
@@ -265,9 +265,9 @@ extern void free_blocks(block* e)
 	while (e != NULL) {
 		next = e->next;
 		free_inlines(e->inline_content);
-		gh_buf_free(&e->string_content);
+		strbuf_free(&e->string_content);
 		if (e->tag == fenced_code) {
-			gh_buf_free(&e->attributes.fenced_code_data.info);
+			strbuf_free(&e->attributes.fenced_code_data.info);
 		} else if (e->tag == document) {
 			free_reference_map(e->attributes.refmap);
 		}
@@ -287,7 +287,7 @@ void process_inlines(block* cur, reference** refmap)
 		case setext_header:
 			cur->inline_content = parse_inlines(&cur->string_content, refmap);
 			// MEM
-			// gh_buf_free(&cur->string_content);
+			// strbuf_free(&cur->string_content);
 			break;
 
 		default:
@@ -369,7 +369,7 @@ static int lists_match(struct ListData list_data,
 			list_data.bullet_char == item_data.bullet_char);
 }
 
-static void expand_tabs(gh_buf *ob, const unsigned char *line, size_t size)
+static void expand_tabs(strbuf *ob, const unsigned char *line, size_t size)
 {
 	size_t  i = 0, tab = 0;
 
@@ -381,13 +381,13 @@ static void expand_tabs(gh_buf *ob, const unsigned char *line, size_t size)
 		}
 
 		if (i > org)
-			gh_buf_put(ob, line + org, i - org);
+			strbuf_put(ob, line + org, i - org);
 
 		if (i >= size)
 			break;
 
 		do {
-			gh_buf_putc(ob, ' '); tab++;
+			strbuf_putc(ob, ' '); tab++;
 		} while (tab % 4);
 
 		i++;
@@ -409,7 +409,7 @@ static block *finalize_document(block *document, int linenum)
 
 extern block *stmd_parse_file(FILE *f)
 {
-	gh_buf line = GH_BUF_INIT;
+	strbuf line = GH_BUF_INIT;
 	unsigned char buffer[4096];
 	int linenum = 1;
 	block *document = make_document();
@@ -417,17 +417,17 @@ extern block *stmd_parse_file(FILE *f)
 	while (fgets((char *)buffer, sizeof(buffer), f)) {
 		expand_tabs(&line, buffer, strlen((char *)buffer));
 		incorporate_line(&line, linenum, &document);
-		gh_buf_clear(&line);
+		strbuf_clear(&line);
 		linenum++;
 	}
 
-	gh_buf_free(&line);
+	strbuf_free(&line);
 	return finalize_document(document, linenum);
 }
 
 extern block *stmd_parse_document(const unsigned char *buffer, size_t len)
 {
-	gh_buf line = GH_BUF_INIT;
+	strbuf line = GH_BUF_INIT;
 	int linenum = 1;
 	const unsigned char *end = buffer + len;
 	block *document = make_document();
@@ -444,11 +444,11 @@ extern block *stmd_parse_document(const unsigned char *buffer, size_t len)
 		}
 
 		incorporate_line(&line, linenum, &document);
-		gh_buf_clear(&line);
+		strbuf_clear(&line);
 		linenum++;
 	}
 
-	gh_buf_free(&line);
+	strbuf_free(&line);
 	return finalize_document(document, linenum);
 }
 
@@ -471,7 +471,7 @@ static void chop_trailing_hashtags(chunk *ch)
 }
 
 // Process one line at a time, modifying a block.
-static void incorporate_line(gh_buf *line, int line_number, block** curptr)
+static void incorporate_line(strbuf *line, int line_number, block** curptr)
 {
 	block* last_matched_container;
 	int offset = 0;
@@ -639,8 +639,8 @@ static void incorporate_line(gh_buf *line, int line_number, block** curptr)
 		} else if (container->tag == paragraph &&
 				(lev = scan_setext_header_line(&input, first_nonspace)) &&
 				// check that there is only one line in the paragraph:
-				gh_buf_strrchr(&container->string_content, '\n',
-					gh_buf_len(&container->string_content) - 2) < 0) {
+				strbuf_strrchr(&container->string_content, '\n',
+					strbuf_len(&container->string_content) - 2) < 0) {
 
 			container->tag = setext_header;
 			container->attributes.header_level = lev;
@@ -734,7 +734,7 @@ static void incorporate_line(gh_buf *line, int line_number, block** curptr)
 			container == last_matched_container &&
 			!blank &&
 			cur->tag == paragraph &&
-			gh_buf_len(&cur->string_content) > 0) {
+			strbuf_len(&cur->string_content) > 0) {
 
 		add_line(cur, &input, offset);
 
diff --git a/src/buffer.c b/src/buffer.c
index dc4a405..90c2186 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -9,32 +9,32 @@
 
 #include "buffer.h"
 
-/* Used as default value for gh_buf->ptr so that people can always
- * assume ptr is non-NULL and zero terminated even for new gh_bufs.
+/* Used as default value for strbuf->ptr so that people can always
+ * assume ptr is non-NULL and zero terminated even for new strbufs.
  */
-unsigned char gh_buf__initbuf[1];
-unsigned char gh_buf__oom[1];
+unsigned char strbuf__initbuf[1];
+unsigned char strbuf__oom[1];
 
 #define ENSURE_SIZE(b, d) \
-	if ((d) > buf->asize && gh_buf_grow(b, (d)) < 0)\
+	if ((d) > buf->asize && strbuf_grow(b, (d)) < 0)\
 		return -1;
 
-void gh_buf_init(gh_buf *buf, int initial_size)
+void strbuf_init(strbuf *buf, int initial_size)
 {
 	buf->asize = 0;
 	buf->size = 0;
-	buf->ptr = gh_buf__initbuf;
+	buf->ptr = strbuf__initbuf;
 
 	if (initial_size)
-		gh_buf_grow(buf, initial_size);
+		strbuf_grow(buf, initial_size);
 }
 
-int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom)
+int strbuf_try_grow(strbuf *buf, int target_size, bool mark_oom)
 {
 	unsigned char *new_ptr;
 	int new_size;
 
-	if (buf->ptr == gh_buf__oom)
+	if (buf->ptr == strbuf__oom)
 		return -1;
 
 	if (target_size <= buf->asize)
@@ -60,7 +60,7 @@ int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom)
 
 	if (!new_ptr) {
 		if (mark_oom)
-			buf->ptr = gh_buf__oom;
+			buf->ptr = strbuf__oom;
 		return -1;
 	}
 
@@ -75,17 +75,17 @@ int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom)
 	return 0;
 }
 
-void gh_buf_free(gh_buf *buf)
+void strbuf_free(strbuf *buf)
 {
 	if (!buf) return;
 
-	if (buf->ptr != gh_buf__initbuf && buf->ptr != gh_buf__oom)
+	if (buf->ptr != strbuf__initbuf && buf->ptr != strbuf__oom)
 		free(buf->ptr);
 
-	gh_buf_init(buf, 0);
+	strbuf_init(buf, 0);
 }
 
-void gh_buf_clear(gh_buf *buf)
+void strbuf_clear(strbuf *buf)
 {
 	buf->size = 0;
 
@@ -93,10 +93,10 @@ void gh_buf_clear(gh_buf *buf)
 		buf->ptr[0] = '\0';
 }
 
-int gh_buf_set(gh_buf *buf, const unsigned char *data, int len)
+int strbuf_set(strbuf *buf, const unsigned char *data, int len)
 {
 	if (len <= 0 || data == NULL) {
-		gh_buf_clear(buf);
+		strbuf_clear(buf);
 	} else {
 		if (data != buf->ptr) {
 			ENSURE_SIZE(buf, len + 1);
@@ -108,14 +108,14 @@ int gh_buf_set(gh_buf *buf, const unsigned char *data, int len)
 	return 0;
 }
 
-int gh_buf_sets(gh_buf *buf, const char *string)
+int strbuf_sets(strbuf *buf, const char *string)
 {
-	return gh_buf_set(buf,
+	return strbuf_set(buf,
 		(const unsigned char *)string,
 		string ? strlen(string) : 0);
 }
 
-int gh_buf_putc(gh_buf *buf, int c)
+int strbuf_putc(strbuf *buf, int c)
 {
 	ENSURE_SIZE(buf, buf->size + 2);
 	buf->ptr[buf->size++] = c;
@@ -123,7 +123,7 @@ int gh_buf_putc(gh_buf *buf, int c)
 	return 0;
 }
 
-int gh_buf_put(gh_buf *buf, const unsigned char *data, int len)
+int strbuf_put(strbuf *buf, const unsigned char *data, int len)
 {
 	if (len <= 0)
 		return 0;
@@ -135,12 +135,12 @@ int gh_buf_put(gh_buf *buf, const unsigned char *data, int len)
 	return 0;
 }
 
-int gh_buf_puts(gh_buf *buf, const char *string)
+int strbuf_puts(strbuf *buf, const char *string)
 {
-	return gh_buf_put(buf, (const unsigned char *)string, strlen(string));
+	return strbuf_put(buf, (const unsigned char *)string, strlen(string));
 }
 
-int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap)
+int strbuf_vprintf(strbuf *buf, const char *format, va_list ap)
 {
 	const int expected_size = buf->size + (strlen(format) * 2);
 	int len;
@@ -159,7 +159,7 @@ int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap)
 
 		if (len < 0) {
 			free(buf->ptr);
-			buf->ptr = gh_buf__oom;
+			buf->ptr = strbuf__oom;
 			return -1;
 		}
 
@@ -174,19 +174,19 @@ int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap)
 	return 0;
 }
 
-int gh_buf_printf(gh_buf *buf, const char *format, ...)
+int strbuf_printf(strbuf *buf, const char *format, ...)
 {
 	int r;
 	va_list ap;
 
 	va_start(ap, format);
-	r = gh_buf_vprintf(buf, format, ap);
+	r = strbuf_vprintf(buf, format, ap);
 	va_end(ap);
 
 	return r;
 }
 
-void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf)
+void strbuf_copy_cstr(char *data, int datasize, const strbuf *buf)
 {
 	int copylen;
 
@@ -204,28 +204,28 @@ void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf)
 	data[copylen] = '\0';
 }
 
-void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b)
+void strbuf_swap(strbuf *buf_a, strbuf *buf_b)
 {
-	gh_buf t = *buf_a;
+	strbuf t = *buf_a;
 	*buf_a = *buf_b;
 	*buf_b = t;
 }
 
-unsigned char *gh_buf_detach(gh_buf *buf)
+unsigned char *strbuf_detach(strbuf *buf)
 {
 	unsigned char *data = buf->ptr;
 
-	if (buf->asize == 0 || buf->ptr == gh_buf__oom)
+	if (buf->asize == 0 || buf->ptr == strbuf__oom)
 		return NULL;
 
-	gh_buf_init(buf, 0);
+	strbuf_init(buf, 0);
 
 	return data;
 }
 
-void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize)
+void strbuf_attach(strbuf *buf, unsigned char *ptr, int asize)
 {
-	gh_buf_free(buf);
+	strbuf_free(buf);
 
 	if (ptr) {
 		buf->ptr = ptr;
@@ -235,18 +235,18 @@ void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize)
 		else /* pass 0 to fall back on strlen + 1 */
 			buf->asize = buf->size + 1;
 	} else {
-		gh_buf_grow(buf, asize);
+		strbuf_grow(buf, asize);
 	}
 }
 
-int gh_buf_cmp(const gh_buf *a, const gh_buf *b)
+int strbuf_cmp(const strbuf *a, const strbuf *b)
 {
 	int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size));
 	return (result != 0) ? result :
 		(a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
 }
 
-int gh_buf_strchr(const gh_buf *buf, int c, int pos)
+int strbuf_strchr(const strbuf *buf, int c, int pos)
 {
 	const unsigned char *p = memchr(buf->ptr + pos, c, buf->size - pos);
 	if (!p)
@@ -255,7 +255,7 @@ int gh_buf_strchr(const gh_buf *buf, int c, int pos)
 	return (int)(p - (const unsigned char *)buf->ptr);
 }
 
-int gh_buf_strrchr(const gh_buf *buf, int c, int pos)
+int strbuf_strrchr(const strbuf *buf, int c, int pos)
 {
 	int i;
 
@@ -267,7 +267,7 @@ int gh_buf_strrchr(const gh_buf *buf, int c, int pos)
 	return -1;
 }
 
-void gh_buf_truncate(gh_buf *buf, int len)
+void strbuf_truncate(strbuf *buf, int len)
 {
 	if (len < buf->size) {
 		buf->size = len;
@@ -275,7 +275,7 @@ void gh_buf_truncate(gh_buf *buf, int len)
 	}
 }
 
-void gh_buf_drop(gh_buf *buf, int n)
+void strbuf_drop(strbuf *buf, int n)
 {
 	if (n > 0) {
 		buf->size = buf->size - n;
@@ -286,7 +286,7 @@ void gh_buf_drop(gh_buf *buf, int n)
 	}
 }
 
-void gh_buf_trim(gh_buf *buf)
+void strbuf_trim(strbuf *buf)
 {
 	int i = 0;
 
@@ -296,7 +296,7 @@ void gh_buf_trim(gh_buf *buf)
 	while (i < buf->size && isspace(buf->ptr[i]))
 		i++;
 
-	gh_buf_drop(buf, i);
+	strbuf_drop(buf, i);
 
 	/* rtrim */
 	while (buf->size > 0) {
diff --git a/src/buffer.h b/src/buffer.h
index 0d5143e..6f45cbb 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -9,20 +9,20 @@
 typedef struct {
 	unsigned char *ptr;
 	int asize, size;
-} gh_buf;
+} strbuf;
 
-extern unsigned char gh_buf__initbuf[];
-extern unsigned char gh_buf__oom[];
+extern unsigned char strbuf__initbuf[];
+extern unsigned char strbuf__oom[];
 
-#define GH_BUF_INIT { gh_buf__initbuf, 0, 0 }
+#define GH_BUF_INIT { strbuf__initbuf, 0, 0 }
 
 /**
- * Initialize a gh_buf structure.
+ * Initialize a strbuf structure.
  *
  * For the cases where GH_BUF_INIT cannot be used to do static
  * initialization.
  */
-extern void gh_buf_init(gh_buf *buf, int initial_size);
+extern void strbuf_init(strbuf *buf, int initial_size);
 
 /**
  * Attempt to grow the buffer to hold at least `target_size` bytes.
@@ -32,7 +32,7 @@ extern void gh_buf_init(gh_buf *buf, int initial_size);
  * existing buffer content will be preserved, but calling code must handle
  * that buffer was not expanded.
  */
-extern int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom);
+extern int strbuf_try_grow(strbuf *buf, int target_size, bool mark_oom);
 
 /**
  * Grow the buffer to hold at least `target_size` bytes.
@@ -42,71 +42,71 @@ extern int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom);
  *
  * @return 0 on success or -1 on failure
  */
-static inline int gh_buf_grow(gh_buf *buf, int target_size)
+static inline int strbuf_grow(strbuf *buf, int target_size)
 {
-	return gh_buf_try_grow(buf, target_size, true);
+	return strbuf_try_grow(buf, target_size, true);
 }
 
-extern void gh_buf_free(gh_buf *buf);
-extern void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b);
+extern void strbuf_free(strbuf *buf);
+extern void strbuf_swap(strbuf *buf_a, strbuf *buf_b);
 
 /**
- * Test if there have been any reallocation failures with this gh_buf.
+ * Test if there have been any reallocation failures with this strbuf.
  *
- * Any function that writes to a gh_buf can fail due to memory allocation
- * issues.  If one fails, the gh_buf will be marked with an OOM error and
- * further calls to modify the buffer will fail.  Check gh_buf_oom() at the
+ * Any function that writes to a strbuf can fail due to memory allocation
+ * issues.  If one fails, the strbuf will be marked with an OOM error and
+ * further calls to modify the buffer will fail.  Check strbuf_oom() at the
  * end of your sequence and it will be true if you ran out of memory at any
  * point with that buffer.
  *
  * @return false if no error, true if allocation error
  */
-static inline bool gh_buf_oom(const gh_buf *buf)
+static inline bool strbuf_oom(const strbuf *buf)
 {
-	return (buf->ptr == gh_buf__oom);
+	return (buf->ptr == strbuf__oom);
 }
 
 
-static inline size_t gh_buf_len(const gh_buf *buf)
+static inline size_t strbuf_len(const strbuf *buf)
 {
 	return buf->size;
 }
 
-extern int gh_buf_cmp(const gh_buf *a, const gh_buf *b);
+extern int strbuf_cmp(const strbuf *a, const strbuf *b);
 
-extern void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize);
-extern unsigned char *gh_buf_detach(gh_buf *buf);
-extern void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf);
+extern void strbuf_attach(strbuf *buf, unsigned char *ptr, int asize);
+extern unsigned char *strbuf_detach(strbuf *buf);
+extern void strbuf_copy_cstr(char *data, int datasize, const strbuf *buf);
 
-static inline const char *gh_buf_cstr(const gh_buf *buf)
+static inline const char *strbuf_cstr(const strbuf *buf)
 {
 	return (char *)buf->ptr;
 }
 
-#define gh_buf_at(buf, n) ((buf)->ptr[n])
+#define strbuf_at(buf, n) ((buf)->ptr[n])
 
 /*
  * Functions below that return int value error codes will return 0 on
  * success or -1 on failure (which generally means an allocation failed).
- * Using a gh_buf where the allocation has failed with result in -1 from
+ * Using a strbuf where the allocation has failed with result in -1 from
  * all further calls using that buffer.  As a result, you can ignore the
  * return code of these functions and call them in a series then just call
- * gh_buf_oom at the end.
+ * strbuf_oom at the end.
  */
-extern int gh_buf_set(gh_buf *buf, const unsigned char *data, int len);
-extern int gh_buf_sets(gh_buf *buf, const char *string);
-extern int gh_buf_putc(gh_buf *buf, int c);
-extern int gh_buf_put(gh_buf *buf, const unsigned char *data, int len);
-extern int gh_buf_puts(gh_buf *buf, const char *string);
-extern int gh_buf_printf(gh_buf *buf, const char *format, ...)
+extern int strbuf_set(strbuf *buf, const unsigned char *data, int len);
+extern int strbuf_sets(strbuf *buf, const char *string);
+extern int strbuf_putc(strbuf *buf, int c);
+extern int strbuf_put(strbuf *buf, const unsigned char *data, int len);
+extern int strbuf_puts(strbuf *buf, const char *string);
+extern int strbuf_printf(strbuf *buf, const char *format, ...)
 	__attribute__((format (printf, 2, 3)));
-extern int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap);
-extern void gh_buf_clear(gh_buf *buf);
-
-int gh_buf_strchr(const gh_buf *buf, int c, int pos);
-int gh_buf_strrchr(const gh_buf *buf, int c, int pos);
-void gh_buf_drop(gh_buf *buf, int n);
-void gh_buf_truncate(gh_buf *buf, int len);
-void gh_buf_trim(gh_buf *buf);
+extern int strbuf_vprintf(strbuf *buf, const char *format, va_list ap);
+extern void strbuf_clear(strbuf *buf);
+
+int strbuf_strchr(const strbuf *buf, int c, int pos);
+int strbuf_strrchr(const strbuf *buf, int c, int pos);
+void strbuf_drop(strbuf *buf, int n);
+void strbuf_truncate(strbuf *buf, int len);
+void strbuf_trim(strbuf *buf);
 
 #endif
diff --git a/src/chunk.h b/src/chunk.h
index f3841ed..f37a2f3 100644
--- a/src/chunk.h
+++ b/src/chunk.h
@@ -78,12 +78,12 @@ static inline chunk chunk_dup(const chunk *ch, int pos, int len)
 	return c;
 }
 
-static inline chunk chunk_buf_detach(gh_buf *buf)
+static inline chunk chunk_buf_detach(strbuf *buf)
 {
 	chunk c;
 
 	c.len = buf->size;
-	c.data = gh_buf_detach(buf);
+	c.data = strbuf_detach(buf);
 	c.alloc = 1;
 
 	return c;
diff --git a/src/html/houdini.h b/src/html/houdini.h
index 31fe917..1e54d20 100644
--- a/src/html/houdini.h
+++ b/src/html/houdini.h
@@ -25,17 +25,17 @@ extern "C" {
 #define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
 #define HOUDINI_UNESCAPED_SIZE(x) (x)
 
-extern int houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure);
-extern int houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_unescape_js(gh_buf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure);
+extern int houdini_unescape_html(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_xml(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_uri(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_url(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_unescape_uri(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_unescape_url(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_js(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_unescape_js(strbuf *ob, const uint8_t *src, size_t size);
 
 #ifdef __cplusplus
 }
diff --git a/src/html/houdini_href_e.c b/src/html/houdini_href_e.c
index b2a7d79..12456ce 100644
--- a/src/html/houdini_href_e.c
+++ b/src/html/houdini_href_e.c
@@ -49,7 +49,7 @@ static const char HREF_SAFE[] = {
 };
 
 int
-houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
+houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size)
 {
 	static const uint8_t hex_chars[] = "0123456789ABCDEF";
 	size_t  i = 0, org;
@@ -63,7 +63,7 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
 			i++;
 
 		if (likely(i > org))
-			gh_buf_put(ob, src + org, i - org);
+			strbuf_put(ob, src + org, i - org);
 
 		/* escaping */
 		if (i >= size)
@@ -73,14 +73,14 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
 		/* amp appears all the time in URLs, but needs
 		 * HTML-entity escaping to be inside an href */
 		case '&':
-			gh_buf_puts(ob, "&amp;");
+			strbuf_puts(ob, "&amp;");
 			break;
 
 		/* the single quote is a valid URL character
 		 * according to the standard; it needs HTML
 		 * entity escaping too */
 		case '\'':
-			gh_buf_puts(ob, "&#x27;");
+			strbuf_puts(ob, "&#x27;");
 			break;
 
 		/* the space can be escaped to %20 or a plus
@@ -89,7 +89,7 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
 		 * when building GET strings */
 #if 0
 		case ' ':
-			gh_buf_putc(ob, '+');
+			strbuf_putc(ob, '+');
 			break;
 #endif
 
@@ -97,7 +97,7 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
 		default:
 			hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
 			hex_str[2] = hex_chars[src[i] & 0xF];
-			gh_buf_put(ob, hex_str, 3);
+			strbuf_put(ob, hex_str, 3);
 		}
 
 		i++;
diff --git a/src/html/houdini_html_e.c b/src/html/houdini_html_e.c
index 95b6c41..f2e86fe 100644
--- a/src/html/houdini_html_e.c
+++ b/src/html/houdini_html_e.c
@@ -45,7 +45,7 @@ static const char *HTML_ESCAPES[] = {
 };
 
 int
-houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
+houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure)
 {
 	size_t  i = 0, org, esc = 0;
 
@@ -55,7 +55,7 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
 			i++;
 
 		if (i > org)
-			gh_buf_put(ob, src + org, i - org);
+			strbuf_put(ob, src + org, i - org);
 
 		/* escaping */
 		if (unlikely(i >= size))
@@ -63,9 +63,9 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
 
 		/* The forward slash is only escaped in secure mode */
 		if ((src[i] == '/' || src[i] == '\'') && !secure) {
-			gh_buf_putc(ob, src[i]);
+			strbuf_putc(ob, src[i]);
 		} else {
-			gh_buf_puts(ob, HTML_ESCAPES[esc]);
+			strbuf_puts(ob, HTML_ESCAPES[esc]);
 		}
 
 		i++;
@@ -75,7 +75,7 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
 }
 
 int
-houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size)
+houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size)
 {
 	return houdini_escape_html0(ob, src, size, 1);
 }
diff --git a/src/html/html.c b/src/html/html.c
index 41b8fda..a9356dd 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -10,7 +10,7 @@
 
 // Functions to convert block and inline lists to HTML strings.
 
-static void escape_html(gh_buf *dest, const unsigned char *source, int length)
+static void escape_html(strbuf *dest, const unsigned char *source, int length)
 {
 	if (length < 0)
 		length = strlen((char *)source);
@@ -18,7 +18,7 @@ static void escape_html(gh_buf *dest, const unsigned char *source, int length)
 	houdini_escape_html0(dest, source, (size_t)length, 0);
 }
 
-static void escape_href(gh_buf *dest, const unsigned char *source, int length)
+static void escape_href(strbuf *dest, const unsigned char *source, int length)
 {
 	if (length < 0)
 		length = strlen((char *)source);
@@ -26,14 +26,14 @@ static void escape_href(gh_buf *dest, const unsigned char *source, int length)
 	houdini_escape_href(dest, source, (size_t)length);
 }
 
-static inline void cr(gh_buf *html)
+static inline void cr(strbuf *html)
 {
 	if (html->size && html->ptr[html->size - 1] != '\n')
-		gh_buf_putc(html, '\n');
+		strbuf_putc(html, '\n');
 }
 
 // Convert a block list to HTML.  Returns 0 on success, and sets result.
-void blocks_to_html(gh_buf *html, block *b, bool tight)
+void blocks_to_html(strbuf *html, block *b, bool tight)
 {
 	struct ListData *data;
 
@@ -48,25 +48,25 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
 					inlines_to_html(html, b->inline_content);
 				} else {
 					cr(html);
-					gh_buf_puts(html, "<p>");
+					strbuf_puts(html, "<p>");
 					inlines_to_html(html, b->inline_content);
-					gh_buf_puts(html, "</p>\n");
+					strbuf_puts(html, "</p>\n");
 				}
 				break;
 
 			case block_quote:
 				cr(html);
-				gh_buf_puts(html, "<blockquote>\n");
+				strbuf_puts(html, "<blockquote>\n");
 				blocks_to_html(html, b->children, false);
-				gh_buf_puts(html, "</blockquote>\n");
+				strbuf_puts(html, "</blockquote>\n");
 				break;
 
 			case list_item:
 				cr(html);
-				gh_buf_puts(html, "<li>");
+				strbuf_puts(html, "<li>");
 				blocks_to_html(html, b->children, tight);
-				gh_buf_trim(html); /* TODO: rtrim */
-				gh_buf_puts(html, "</li>\n");
+				strbuf_trim(html); /* TODO: rtrim */
+				strbuf_puts(html, "</li>\n");
 				break;
 
 			case list:
@@ -75,58 +75,58 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
 				data = &(b->attributes.list_data);
 
 				if (data->start > 1) {
-					gh_buf_printf(html, "<%s start=\"%d\">\n",
+					strbuf_printf(html, "<%s start=\"%d\">\n",
 							data->list_type == bullet ? "ul" : "ol",
 							data->start);
 				} else {
-					gh_buf_puts(html, data->list_type == bullet ? "<ul>\n" : "<ol>\n");
+					strbuf_puts(html, data->list_type == bullet ? "<ul>\n" : "<ol>\n");
 				}
 
 				blocks_to_html(html, b->children, data->tight);
-				gh_buf_puts(html, data->list_type == bullet ? "</ul>" : "</ol>");
-				gh_buf_putc(html, '\n');
+				strbuf_puts(html, data->list_type == bullet ? "</ul>" : "</ol>");
+				strbuf_putc(html, '\n');
 				break;
 
 			case atx_header:
 			case setext_header:
 				cr(html);
-				gh_buf_printf(html, "<h%d>", b->attributes.header_level);
+				strbuf_printf(html, "<h%d>", b->attributes.header_level);
 				inlines_to_html(html, b->inline_content);
-				gh_buf_printf(html, "</h%d>\n", b->attributes.header_level);
+				strbuf_printf(html, "</h%d>\n", b->attributes.header_level);
 				break;
 
 			case indented_code:
 			case fenced_code:
 				cr(html);
 
-				gh_buf_puts(html, "<pre");
+				strbuf_puts(html, "<pre");
 
 				if (b->tag == fenced_code) {
-					gh_buf *info = &b->attributes.fenced_code_data.info;
+					strbuf *info = &b->attributes.fenced_code_data.info;
 
-					if (gh_buf_len(info) > 0) {
-						int first_tag = gh_buf_strchr(info, ' ', 0);
+					if (strbuf_len(info) > 0) {
+						int first_tag = strbuf_strchr(info, ' ', 0);
 						if (first_tag < 0)
-							first_tag = gh_buf_len(info);
+							first_tag = strbuf_len(info);
 
 
-						gh_buf_puts(html, " class=\"");
+						strbuf_puts(html, " class=\"");
 						escape_html(html, info->ptr, first_tag);
-						gh_buf_putc(html, '"');
+						strbuf_putc(html, '"');
 					}
 				}
 
-				gh_buf_puts(html, "><code>");
+				strbuf_puts(html, "><code>");
 				escape_html(html, b->string_content.ptr, b->string_content.size);
-				gh_buf_puts(html, "</code></pre>\n");
+				strbuf_puts(html, "</code></pre>\n");
 				break;
 
 			case html_block:
-				gh_buf_put(html, b->string_content.ptr, b->string_content.size);
+				strbuf_put(html, b->string_content.ptr, b->string_content.size);
 				break;
 
 			case hrule:
-				gh_buf_puts(html, "<hr />\n");
+				strbuf_puts(html, "<hr />\n");
 				break;
 
 			case reference_def:
@@ -141,9 +141,9 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
 }
 
 // Convert an inline list to HTML.  Returns 0 on success, and sets result.
-void inlines_to_html(gh_buf *html, inl* ils)
+void inlines_to_html(strbuf *html, inl* ils)
 {
-	gh_buf scrap = GH_BUF_INIT;
+	strbuf scrap = GH_BUF_INIT;
 
 	while(ils != NULL) {
 		switch(ils->tag) {
@@ -152,70 +152,70 @@ void inlines_to_html(gh_buf *html, inl* ils)
 				break;
 
 			case INL_LINEBREAK:
-				gh_buf_puts(html, "<br />\n");
+				strbuf_puts(html, "<br />\n");
 				break;
 
 			case INL_SOFTBREAK:
-				gh_buf_putc(html, '\n');
+				strbuf_putc(html, '\n');
 				break;
 
 			case INL_CODE:
-				gh_buf_puts(html, "<code>");
+				strbuf_puts(html, "<code>");
 				escape_html(html, ils->content.literal.data, ils->content.literal.len);
-				gh_buf_puts(html, "</code>");
+				strbuf_puts(html, "</code>");
 				break;
 
 			case INL_RAW_HTML:
 			case INL_ENTITY:
-				gh_buf_put(html,
+				strbuf_put(html,
 						ils->content.literal.data,
 						ils->content.literal.len);
 				break;
 
 			case INL_LINK:
-				gh_buf_puts(html, "<a href=\"");
+				strbuf_puts(html, "<a href=\"");
 				if (ils->content.linkable.url)
 					escape_href(html, ils->content.linkable.url, -1);
 
 				if (ils->content.linkable.title) {
-					gh_buf_puts(html, "\" title=\"");
+					strbuf_puts(html, "\" title=\"");
 					escape_html(html, ils->content.linkable.title, -1);
 				}
 
-				gh_buf_puts(html, "\">");
+				strbuf_puts(html, "\">");
 				inlines_to_html(html, ils->content.inlines);
-				gh_buf_puts(html, "</a>");
+				strbuf_puts(html, "</a>");
 				break;
 
 			case INL_IMAGE:
-				gh_buf_puts(html, "<img src=\"");
+				strbuf_puts(html, "<img src=\"");
 				if (ils->content.linkable.url)
 					escape_href(html, ils->content.linkable.url, -1);
 
 				inlines_to_html(&scrap, ils->content.inlines);
-				gh_buf_puts(html, "\" alt=\"");
+				strbuf_puts(html, "\" alt=\"");
 				if (scrap.size)
 					escape_html(html, scrap.ptr, scrap.size);
-				gh_buf_clear(&scrap);
+				strbuf_clear(&scrap);
 
 				if (ils->content.linkable.title) {
-					gh_buf_puts(html, "\" title=\"");
+					strbuf_puts(html, "\" title=\"");
 					escape_html(html, ils->content.linkable.title, -1);
 				}
 
-				gh_buf_puts(html, "\"/>");
+				strbuf_puts(html, "\"/>");
 				break;
 
 			case INL_STRONG:
-				gh_buf_puts(html, "<strong>");
+				strbuf_puts(html, "<strong>");
 				inlines_to_html(html, ils->content.inlines);
-				gh_buf_puts(html, "</strong>");
+				strbuf_puts(html, "</strong>");
 				break;
 
 			case INL_EMPH:
-				gh_buf_puts(html, "<em>");
+				strbuf_puts(html, "<em>");
 				inlines_to_html(html, ils->content.inlines);
-				gh_buf_puts(html, "</em>");
+				strbuf_puts(html, "</em>");
 				break;
 		}
 		ils = ils->next;
diff --git a/src/inlines.c b/src/inlines.c
index 8e2e683..33973df 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -25,7 +25,7 @@ inline static void chunk_free(chunk *c);
 inline static void chunk_trim(chunk *c);
 
 inline static chunk chunk_literal(const char *data);
-inline static chunk chunk_buf_detach(gh_buf *buf);
+inline static chunk chunk_buf_detach(strbuf *buf);
 inline static chunk chunk_dup(const chunk *ch, int pos, int len);
 
 static inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
@@ -33,10 +33,10 @@ static inl *parse_inlines_while(subject* subj, int (*f)(subject*));
 static int parse_inline(subject* subj, inl ** last);
 
 static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap);
-static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap);
+static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap);
 static int subject_find_special_char(subject *subj);
 
-static void normalize_whitespace(gh_buf *s);
+static void normalize_whitespace(strbuf *s);
 
 extern void free_reference(reference *ref) {
 	free(ref->label);
@@ -62,13 +62,13 @@ extern void free_reference_map(reference **refmap) {
 // remove leading/trailing whitespace, case fold
 static unsigned char *normalize_reference(chunk *ref)
 {
-	gh_buf normalized = GH_BUF_INIT;
+	strbuf normalized = GH_BUF_INIT;
 
 	utf8proc_case_fold(&normalized, ref->data, ref->len);
-	gh_buf_trim(&normalized);
+	strbuf_trim(&normalized);
 	normalize_whitespace(&normalized);
 
-	return gh_buf_detach(&normalized);
+	return strbuf_detach(&normalized);
 }
 
 // Returns reference if refmap contains a reference with matching
@@ -218,7 +218,7 @@ inline static inl* append_inlines(inl* a, inl* b)
 	return a;
 }
 
-static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap)
+static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap)
 {
 	e->input.data = buffer->ptr;
 	e->input.len = buffer->size;
@@ -309,7 +309,7 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
 
 // Destructively modify string, collapsing consecutive
 // space and newline characters into a single space.
-static void normalize_whitespace(gh_buf *s)
+static void normalize_whitespace(strbuf *s)
 {
 	bool last_char_was_space = false;
 	int r, w;
@@ -331,7 +331,7 @@ static void normalize_whitespace(gh_buf *s)
 		}
 	}
 
-	gh_buf_truncate(s, w);
+	strbuf_truncate(s, w);
 }
 
 // Parse backtick code section or raw backticks, return an inline.
@@ -346,10 +346,10 @@ static inl* handle_backticks(subject *subj)
 		subj->pos = startpos; // rewind
 		return make_str(openticks);
 	} else {
-		gh_buf buf = GH_BUF_INIT;
+		strbuf buf = GH_BUF_INIT;
 
-		gh_buf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
-		gh_buf_trim(&buf);
+		strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
+		strbuf_trim(&buf);
 		normalize_whitespace(&buf);
 
 		return make_code(chunk_buf_detach(&buf));
@@ -569,7 +569,7 @@ static inl *make_str_with_entities(chunk *content)
 }
 
 // Destructively unescape a string: remove backslashes before punctuation chars.
-extern void unescape_buffer(gh_buf *buf)
+extern void unescape_buffer(strbuf *buf)
 {
 	int r, w;
 
@@ -580,14 +580,14 @@ extern void unescape_buffer(gh_buf *buf)
 		buf->ptr[w++] = buf->ptr[r];
 	}
 
-	gh_buf_truncate(buf, w);
+	strbuf_truncate(buf, w);
 }
 
 // Clean a URL: remove surrounding whitespace and surrounding <>,
 // and remove \ that escape punctuation.
 static unsigned char *clean_url(chunk *url, int is_email)
 {
-	gh_buf buf = GH_BUF_INIT;
+	strbuf buf = GH_BUF_INIT;
 
 	chunk_trim(url);
 
@@ -595,22 +595,22 @@ static unsigned char *clean_url(chunk *url, int is_email)
 		return NULL;
 
 	if (is_email)
-		gh_buf_puts(&buf, "mailto:");
+		strbuf_puts(&buf, "mailto:");
 
 	if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
-		gh_buf_put(&buf, url->data + 1, url->len - 2);
+		strbuf_put(&buf, url->data + 1, url->len - 2);
 	} else {
-		gh_buf_put(&buf, url->data, url->len);
+		strbuf_put(&buf, url->data, url->len);
 	}
 
 	unescape_buffer(&buf);
-	return gh_buf_detach(&buf);
+	return strbuf_detach(&buf);
 }
 
 // Clean a title: remove surrounding quotes and remove \ that escape punctuation.
 static unsigned char *clean_title(chunk *title)
 {
-	gh_buf buf = GH_BUF_INIT;
+	strbuf buf = GH_BUF_INIT;
 	unsigned char first, last;
 
 	if (title->len == 0)
@@ -623,13 +623,13 @@ static unsigned char *clean_title(chunk *title)
 	if ((first == '\'' && last == '\'') ||
 		(first == '(' && last == ')') ||
 		(first == '"' && last == '"')) {
-		gh_buf_set(&buf, title->data + 1, title->len - 2);
+		strbuf_set(&buf, title->data + 1, title->len - 2);
 	} else {
-		gh_buf_set(&buf, title->data, title->len);
+		strbuf_set(&buf, title->data, title->len);
 	}
 
 	unescape_buffer(&buf);
-	return gh_buf_detach(&buf);
+	return strbuf_detach(&buf);
 }
 
 // Parse an autolink or HTML tag.
@@ -971,7 +971,7 @@ static int parse_inline(subject* subj, inl ** last)
 	return 1;
 }
 
-extern inl* parse_inlines(gh_buf *input, reference** refmap)
+extern inl* parse_inlines(strbuf *input, reference** refmap)
 {
 	subject subj;
 	subject_from_buf(&subj, input, refmap);
@@ -993,7 +993,7 @@ void spnl(subject* subj)
 // Modify refmap if a reference is encountered.
 // Return 0 if no reference found, otherwise position of subject
 // after reference is parsed.
-extern int parse_reference(gh_buf *input, reference** refmap)
+extern int parse_reference(strbuf *input, reference** refmap)
 {
 	subject subj;
 
diff --git a/src/main.c b/src/main.c
index e1abedc..7cf67e2 100644
--- a/src/main.c
+++ b/src/main.c
@@ -14,14 +14,14 @@ void print_usage()
 
 static void print_document(block *document, bool ast)
 {
-	gh_buf html = GH_BUF_INIT;
+	strbuf html = GH_BUF_INIT;
 
 	if (ast) {
 		print_blocks(document, 0);
 	} else {
 		blocks_to_html(&html, document, false);
 		printf("%s", html.ptr);
-		gh_buf_free(&html);
+		strbuf_free(&html);
 	}
 }
 
diff --git a/src/stmd.h b/src/stmd.h
index 4a3c399..2e86f3a 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -50,7 +50,7 @@ struct FencedCodeData {
   int               fence_length;
   int               fence_offset;
   char              fence_char;
-  gh_buf            info;
+  strbuf            info;
 };
 
 typedef struct Block {
@@ -76,7 +76,7 @@ typedef struct Block {
   struct Block*      last_child;
   struct Block*      parent;
   struct Block*      top;
-  gh_buf			 string_content;
+  strbuf			 string_content;
   inl*               inline_content;
   union  {
     struct ListData       list_data;
@@ -88,15 +88,15 @@ typedef struct Block {
   struct Block *     prev;
 } block;
 
-inl* parse_inlines(gh_buf *input, reference** refmap);
+inl* parse_inlines(strbuf *input, reference** refmap);
 void free_inlines(inl* e);
 
-int parse_reference(gh_buf *input, reference** refmap);
+int parse_reference(strbuf *input, reference** refmap);
 void free_reference(reference *ref);
 void free_reference_map(reference **refmap);
 
 void add_reference(reference** refmap, reference* ref);
-void unescape_buffer(gh_buf *buf);
+void unescape_buffer(strbuf *buf);
 
 extern block* make_document();
 extern block* add_child(block* parent,
@@ -109,9 +109,9 @@ extern block *stmd_parse_file(FILE *f);
 void print_inlines(inl* ils, int indent);
 void print_blocks(block* blk, int indent);
 
-void blocks_to_html(gh_buf *html, block *b, bool tight);
-void inlines_to_html(gh_buf *html, inl *b);
+void blocks_to_html(strbuf *html, block *b, bool tight);
+void inlines_to_html(strbuf *html, inl *b);
 
-void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len);
+void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len);
 
 #endif
diff --git a/src/utf8.c b/src/utf8.c
index 32c78a4..cebd872 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -84,7 +84,7 @@ ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst)
 	return length;
 }
 
-void utf8proc_encode_char(int32_t uc, gh_buf *buf)
+void utf8proc_encode_char(int32_t uc, strbuf *buf)
 {
 	unsigned char dst[4];
 	int len = 0;
@@ -119,10 +119,10 @@ void utf8proc_encode_char(int32_t uc, gh_buf *buf)
 		assert(false);
 	}
 
-	gh_buf_put(buf, dst, len);
+	strbuf_put(buf, dst, len);
 }
 
-void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len)
+void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len)
 {
 	int32_t c;
 
-- 
cgit v1.2.3


From 647b15968c95ec268d6d728eea73756c7ba648a8 Mon Sep 17 00:00:00 2001
From: Vicent Marti <tanoku@gmail.com>
Date: Thu, 4 Sep 2014 18:42:49 +0200
Subject: Rename inl

---
 src/html/html.c |  2 +-
 src/inlines.c   | 88 ++++++++++++++++++++++++++++-----------------------------
 src/print.c     |  2 +-
 src/stmd.h      | 34 ++++++++++++++--------
 4 files changed, 68 insertions(+), 58 deletions(-)

(limited to 'src/inlines.c')

diff --git a/src/html/html.c b/src/html/html.c
index a9356dd..53521b8 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -141,7 +141,7 @@ void blocks_to_html(strbuf *html, block *b, bool tight)
 }
 
 // Convert an inline list to HTML.  Returns 0 on success, and sets result.
-void inlines_to_html(strbuf *html, inl* ils)
+void inlines_to_html(strbuf *html, struct inl* ils)
 {
 	strbuf scrap = GH_BUF_INIT;
 
diff --git a/src/inlines.c b/src/inlines.c
index 33973df..301125e 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -28,9 +28,9 @@ inline static chunk chunk_literal(const char *data);
 inline static chunk chunk_buf_detach(strbuf *buf);
 inline static chunk chunk_dup(const chunk *ch, int pos, int len);
 
-static inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
-static inl *parse_inlines_while(subject* subj, int (*f)(subject*));
-static int parse_inline(subject* subj, inl ** last);
+static struct inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
+static struct inl *parse_inlines_while(subject* subj, int (*f)(subject*));
+static int parse_inline(subject* subj, struct inl ** last);
 
 static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap);
 static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap);
@@ -108,9 +108,9 @@ extern void add_reference(reference** refmap, reference* ref)
 	}
 }
 
-inline static inl* make_link_from_reference(inl* label, reference *ref)
+inline static struct inl* make_link_from_reference(struct inl* label, reference *ref)
 {
-	inl* e = (inl*) malloc(sizeof(inl));
+	struct inl* e = (struct inl*) malloc(sizeof(struct inl));
 	e->tag = INL_LINK;
 	e->content.linkable.label = label;
 	e->content.linkable.url   = strdup(ref->url);
@@ -120,9 +120,9 @@ inline static inl* make_link_from_reference(inl* label, reference *ref)
 }
 
 // Create an inline with a linkable string value.
-inline static inl* make_link(inl* label, chunk url, chunk title, int is_email)
+inline static struct inl* make_link(struct inl* label, chunk url, chunk title, int is_email)
 {
-	inl* e = (inl*) malloc(sizeof(inl));
+	struct inl* e = (struct inl*) malloc(sizeof(struct inl));
 	e->tag = INL_LINK;
 	e->content.linkable.label = label;
 	e->content.linkable.url   = clean_url(&url, is_email);
@@ -131,9 +131,9 @@ inline static inl* make_link(inl* label, chunk url, chunk title, int is_email)
 	return e;
 }
 
-inline static inl* make_inlines(int t, inl* contents)
+inline static struct inl* make_inlines(int t, struct inl* contents)
 {
-	inl* e = (inl*) malloc(sizeof(inl));
+	struct inl* e = (struct inl*) malloc(sizeof(struct inl));
 	e->tag = t;
 	e->content.inlines = contents;
 	e->next = NULL;
@@ -141,9 +141,9 @@ inline static inl* make_inlines(int t, inl* contents)
 }
 
 // Create an inline with a literal string value.
-inline static inl* make_literal(int t, chunk s)
+inline static struct inl* make_literal(int t, chunk s)
 {
-	inl* e = (inl*) malloc(sizeof(inl));
+	struct inl* e = (struct inl*) malloc(sizeof(struct inl));
 	e->tag = t;
 	e->content.literal = s;
 	e->next = NULL;
@@ -151,9 +151,9 @@ inline static inl* make_literal(int t, chunk s)
 }
 
 // Create an inline with no value.
-inline static inl* make_simple(int t)
+inline static struct inl* make_simple(int t)
 {
-	inl* e = (inl*) malloc(sizeof(inl));
+	struct inl* e = (struct inl*) malloc(sizeof(struct inl));
 	e->tag = t;
 	e->next = NULL;
 	return e;
@@ -170,9 +170,9 @@ inline static inl* make_simple(int t)
 #define make_strong(contents) make_inlines(INL_STRONG, contents)
 
 // Free an inline list.
-extern void free_inlines(inl* e)
+extern void free_inlines(struct inl* e)
 {
-	inl * next;
+	struct inl * next;
 	while (e != NULL) {
 		switch (e->tag){
 			case INL_STRING:
@@ -205,12 +205,12 @@ extern void free_inlines(inl* e)
 
 // Append inline list b to the end of inline list a.
 // Return pointer to head of new list.
-inline static inl* append_inlines(inl* a, inl* b)
+inline static struct inl* append_inlines(struct inl* a, struct inl* b)
 {
 	if (a == NULL) {  // NULL acts like an empty list
 		return b;
 	}
-	inl* cur = a;
+	struct inl* cur = a;
 	while (cur->next) {
 		cur = cur->next;
 	}
@@ -336,7 +336,7 @@ static void normalize_whitespace(strbuf *s)
 
 // Parse backtick code section or raw backticks, return an inline.
 // Assumes that the subject has a backtick at the current position.
-static inl* handle_backticks(subject *subj)
+static struct inl* handle_backticks(subject *subj)
 {
 	chunk openticks = take_while(subj, isbacktick);
 	int startpos = subj->pos;
@@ -382,15 +382,15 @@ static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close)
 
 // Parse strong/emph or a fallback.
 // Assumes the subject has '_' or '*' at the current position.
-static inl* handle_strong_emph(subject* subj, char c)
+static struct inl* handle_strong_emph(subject* subj, char c)
 {
 	bool can_open, can_close;
-	inl * result = NULL;
-	inl ** last = malloc(sizeof(inl *));
-	inl * new;
-	inl * il;
-	inl * first_head = NULL;
-	inl * first_close = NULL;
+	struct inl * result = NULL;
+	struct inl ** last = malloc(sizeof(struct inl *));
+	struct inl * new;
+	struct inl * il;
+	struct inl * first_head = NULL;
+	struct inl * first_close = NULL;
 	int first_close_delims = 0;
 	int numdelims;
 
@@ -508,7 +508,7 @@ done:
 }
 
 // Parse backslash-escape or just a backslash, returning an inline.
-static inl* handle_backslash(subject *subj)
+static struct inl* handle_backslash(subject *subj)
 {
 	advance(subj);
 	unsigned char nextchar = peek_char(subj);
@@ -525,10 +525,10 @@ static inl* handle_backslash(subject *subj)
 
 // Parse an entity or a regular "&" string.
 // Assumes the subject has an '&' character at the current position.
-static inl* handle_entity(subject* subj)
+static struct inl* handle_entity(subject* subj)
 {
 	int match;
-	inl *result;
+	struct inl *result;
 	match = scan_entity(&subj->input, subj->pos);
 	if (match) {
 		result = make_entity(chunk_dup(&subj->input, subj->pos, match));
@@ -542,10 +542,10 @@ static inl* handle_entity(subject* subj)
 
 // Like make_str, but parses entities.
 // Returns an inline sequence consisting of str and entity elements.
-static inl *make_str_with_entities(chunk *content)
+static struct inl *make_str_with_entities(chunk *content)
 {
-	inl *result = NULL;
-	inl *new;
+	struct inl *result = NULL;
+	struct inl *new;
 	int searchpos;
 	char c;
 	subject subj;
@@ -634,7 +634,7 @@ static unsigned char *clean_title(chunk *title)
 
 // Parse an autolink or HTML tag.
 // Assumes the subject has a '<' character at the current position.
-static inl* handle_pointy_brace(subject* subj)
+static struct inl* handle_pointy_brace(subject* subj)
 {
 	int matchlen = 0;
 	chunk contents;
@@ -693,7 +693,7 @@ static inl* handle_pointy_brace(subject* subj)
 static int link_label(subject* subj, chunk *raw_label)
 {
 	int nestlevel = 0;
-	inl* tmp = NULL;
+	struct inl* tmp = NULL;
 	int startpos = subj->pos;
 
 	if (subj->label_nestlevel) {
@@ -751,10 +751,10 @@ static int link_label(subject* subj, chunk *raw_label)
 }
 
 // Parse a link or the link portion of an image, or return a fallback.
-static inl* handle_left_bracket(subject* subj)
+static struct inl* handle_left_bracket(subject* subj)
 {
-	inl *lab = NULL;
-	inl *result = NULL;
+	struct inl *lab = NULL;
+	struct inl *result = NULL;
 	reference *ref;
 	int n;
 	int sps;
@@ -838,7 +838,7 @@ static inl* handle_left_bracket(subject* subj)
 
 // Parse a hard or soft linebreak, returning an inline.
 // Assumes the subject has a newline at the current position.
-static inl* handle_newline(subject *subj)
+static struct inl* handle_newline(subject *subj)
 {
 	int nlpos = subj->pos;
 	// skip over newline
@@ -862,16 +862,16 @@ inline static int not_eof(subject* subj)
 }
 
 // Parse inlines while a predicate is satisfied.  Return inlines.
-extern inl* parse_inlines_while(subject* subj, int (*f)(subject*))
+extern struct inl* parse_inlines_while(subject* subj, int (*f)(subject*))
 {
-	inl* result = NULL;
-	inl** last = &result;
+	struct inl* result = NULL;
+	struct inl** last = &result;
 	while ((*f)(subj) && parse_inline(subj, last)) {
 	}
 	return result;
 }
 
-inl *parse_chunk_inlines(chunk *chunk, reference** refmap)
+struct inl *parse_chunk_inlines(chunk *chunk, reference** refmap)
 {
 	subject subj;
 	subject_from_chunk(&subj, chunk, refmap);
@@ -894,9 +894,9 @@ static int subject_find_special_char(subject *subj)
 // Parse an inline, advancing subject, and add it to last element.
 // Adjust tail to point to new last element of list.
 // Return 0 if no inline can be parsed, 1 otherwise.
-static int parse_inline(subject* subj, inl ** last)
+static int parse_inline(subject* subj, struct inl ** last)
 {
-	inl* new = NULL;
+	struct inl* new = NULL;
 	chunk contents;
 	unsigned char c;
 	int endpos;
@@ -971,7 +971,7 @@ static int parse_inline(subject* subj, inl ** last)
 	return 1;
 }
 
-extern inl* parse_inlines(strbuf *input, reference** refmap)
+extern struct inl* parse_inlines(strbuf *input, reference** refmap)
 {
 	subject subj;
 	subject_from_buf(&subj, input, refmap);
diff --git a/src/print.c b/src/print.c
index 832ad4f..63f63c8 100644
--- a/src/print.c
+++ b/src/print.c
@@ -117,7 +117,7 @@ extern void print_blocks(block* b, int indent)
 }
 
 // Prettyprint an inline list, for debugging.
-extern void print_inlines(inl* ils, int indent)
+extern void print_inlines(struct inl* ils, int indent)
 {
 	while(ils != NULL) {
 		for (int i=0; i < indent; i++) {
diff --git a/src/stmd.h b/src/stmd.h
index 2e86f3a..9ed33ec 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -10,20 +10,30 @@
 #define VERSION "0.1"
 #define CODE_INDENT 4
 
-typedef struct Inline {
-	enum { INL_STRING, INL_SOFTBREAK, INL_LINEBREAK, INL_CODE, INL_RAW_HTML, INL_ENTITY,
-		INL_EMPH, INL_STRONG, INL_LINK, INL_IMAGE } tag;
+struct inl {
+	enum {
+		INL_STRING,
+		INL_SOFTBREAK,
+		INL_LINEBREAK,
+		INL_CODE,
+		INL_RAW_HTML,
+		INL_ENTITY,
+		INL_EMPH,
+		INL_STRONG,
+		INL_LINK,
+		INL_IMAGE
+	} tag;
 	union {
 		chunk literal;
-		struct Inline *inlines;
+		struct inl *inlines;
 		struct {
-			struct Inline *label;
+			struct inl *label;
 			unsigned char *url;
 			unsigned char *title;
 		} linkable;
 	} content;
-	struct Inline *next;
-} inl;
+	struct inl *next;
+};
 
 typedef struct Reference {
   unsigned char *label;
@@ -77,7 +87,7 @@ typedef struct Block {
   struct Block*      parent;
   struct Block*      top;
   strbuf			 string_content;
-  inl*               inline_content;
+  struct inl*               inline_content;
   union  {
     struct ListData       list_data;
     struct FencedCodeData fenced_code_data;
@@ -88,8 +98,8 @@ typedef struct Block {
   struct Block *     prev;
 } block;
 
-inl* parse_inlines(strbuf *input, reference** refmap);
-void free_inlines(inl* e);
+struct inl* parse_inlines(strbuf *input, reference** refmap);
+void free_inlines(struct inl* e);
 
 int parse_reference(strbuf *input, reference** refmap);
 void free_reference(reference *ref);
@@ -106,11 +116,11 @@ void free_blocks(block* e);
 extern block *stmd_parse_document(const unsigned char *buffer, size_t len);
 extern block *stmd_parse_file(FILE *f);
 
-void print_inlines(inl* ils, int indent);
+void print_inlines(struct inl* ils, int indent);
 void print_blocks(block* blk, int indent);
 
 void blocks_to_html(strbuf *html, block *b, bool tight);
-void inlines_to_html(strbuf *html, inl *b);
+void inlines_to_html(strbuf *html, struct inl *b);
 
 void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len);
 
-- 
cgit v1.2.3


From 9e4855365b920c2a80b0f1ab6937280f0b504334 Mon Sep 17 00:00:00 2001
From: Vicent Marti <tanoku@gmail.com>
Date: Thu, 4 Sep 2014 18:45:44 +0200
Subject: Rename `inl`

---
 src/html/html.c |  2 +-
 src/inlines.c   | 88 ++++++++++++++++++++++++++++-----------------------------
 src/print.c     |  2 +-
 src/stmd.h      | 28 ++++++++++--------
 4 files changed, 62 insertions(+), 58 deletions(-)

(limited to 'src/inlines.c')

diff --git a/src/html/html.c b/src/html/html.c
index 53521b8..a7bb21a 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -141,7 +141,7 @@ void blocks_to_html(strbuf *html, block *b, bool tight)
 }
 
 // Convert an inline list to HTML.  Returns 0 on success, and sets result.
-void inlines_to_html(strbuf *html, struct inl* ils)
+void inlines_to_html(strbuf *html, node_inl* ils)
 {
 	strbuf scrap = GH_BUF_INIT;
 
diff --git a/src/inlines.c b/src/inlines.c
index 301125e..6bb89da 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -28,9 +28,9 @@ inline static chunk chunk_literal(const char *data);
 inline static chunk chunk_buf_detach(strbuf *buf);
 inline static chunk chunk_dup(const chunk *ch, int pos, int len);
 
-static struct inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
-static struct inl *parse_inlines_while(subject* subj, int (*f)(subject*));
-static int parse_inline(subject* subj, struct inl ** last);
+static node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
+static node_inl *parse_inlines_while(subject* subj, int (*f)(subject*));
+static int parse_inline(subject* subj, node_inl ** last);
 
 static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap);
 static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap);
@@ -108,9 +108,9 @@ extern void add_reference(reference** refmap, reference* ref)
 	}
 }
 
-inline static struct inl* make_link_from_reference(struct inl* label, reference *ref)
+inline static node_inl* make_link_from_reference(node_inl* label, reference *ref)
 {
-	struct inl* e = (struct inl*) malloc(sizeof(struct inl));
+	node_inl* e = (node_inl*) malloc(sizeof(node_inl));
 	e->tag = INL_LINK;
 	e->content.linkable.label = label;
 	e->content.linkable.url   = strdup(ref->url);
@@ -120,9 +120,9 @@ inline static struct inl* make_link_from_reference(struct inl* label, reference
 }
 
 // Create an inline with a linkable string value.
-inline static struct inl* make_link(struct inl* label, chunk url, chunk title, int is_email)
+inline static node_inl* make_link(node_inl* label, chunk url, chunk title, int is_email)
 {
-	struct inl* e = (struct inl*) malloc(sizeof(struct inl));
+	node_inl* e = (node_inl*) malloc(sizeof(node_inl));
 	e->tag = INL_LINK;
 	e->content.linkable.label = label;
 	e->content.linkable.url   = clean_url(&url, is_email);
@@ -131,9 +131,9 @@ inline static struct inl* make_link(struct inl* label, chunk url, chunk title, i
 	return e;
 }
 
-inline static struct inl* make_inlines(int t, struct inl* contents)
+inline static node_inl* make_inlines(int t, node_inl* contents)
 {
-	struct inl* e = (struct inl*) malloc(sizeof(struct inl));
+	node_inl* e = (node_inl*) malloc(sizeof(node_inl));
 	e->tag = t;
 	e->content.inlines = contents;
 	e->next = NULL;
@@ -141,9 +141,9 @@ inline static struct inl* make_inlines(int t, struct inl* contents)
 }
 
 // Create an inline with a literal string value.
-inline static struct inl* make_literal(int t, chunk s)
+inline static node_inl* make_literal(int t, chunk s)
 {
-	struct inl* e = (struct inl*) malloc(sizeof(struct inl));
+	node_inl* e = (node_inl*) malloc(sizeof(node_inl));
 	e->tag = t;
 	e->content.literal = s;
 	e->next = NULL;
@@ -151,9 +151,9 @@ inline static struct inl* make_literal(int t, chunk s)
 }
 
 // Create an inline with no value.
-inline static struct inl* make_simple(int t)
+inline static node_inl* make_simple(int t)
 {
-	struct inl* e = (struct inl*) malloc(sizeof(struct inl));
+	node_inl* e = (node_inl*) malloc(sizeof(node_inl));
 	e->tag = t;
 	e->next = NULL;
 	return e;
@@ -170,9 +170,9 @@ inline static struct inl* make_simple(int t)
 #define make_strong(contents) make_inlines(INL_STRONG, contents)
 
 // Free an inline list.
-extern void free_inlines(struct inl* e)
+extern void free_inlines(node_inl* e)
 {
-	struct inl * next;
+	node_inl * next;
 	while (e != NULL) {
 		switch (e->tag){
 			case INL_STRING:
@@ -205,12 +205,12 @@ extern void free_inlines(struct inl* e)
 
 // Append inline list b to the end of inline list a.
 // Return pointer to head of new list.
-inline static struct inl* append_inlines(struct inl* a, struct inl* b)
+inline static node_inl* append_inlines(node_inl* a, node_inl* b)
 {
 	if (a == NULL) {  // NULL acts like an empty list
 		return b;
 	}
-	struct inl* cur = a;
+	node_inl* cur = a;
 	while (cur->next) {
 		cur = cur->next;
 	}
@@ -336,7 +336,7 @@ static void normalize_whitespace(strbuf *s)
 
 // Parse backtick code section or raw backticks, return an inline.
 // Assumes that the subject has a backtick at the current position.
-static struct inl* handle_backticks(subject *subj)
+static node_inl* handle_backticks(subject *subj)
 {
 	chunk openticks = take_while(subj, isbacktick);
 	int startpos = subj->pos;
@@ -382,15 +382,15 @@ static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close)
 
 // Parse strong/emph or a fallback.
 // Assumes the subject has '_' or '*' at the current position.
-static struct inl* handle_strong_emph(subject* subj, char c)
+static node_inl* handle_strong_emph(subject* subj, char c)
 {
 	bool can_open, can_close;
-	struct inl * result = NULL;
-	struct inl ** last = malloc(sizeof(struct inl *));
-	struct inl * new;
-	struct inl * il;
-	struct inl * first_head = NULL;
-	struct inl * first_close = NULL;
+	node_inl * result = NULL;
+	node_inl ** last = malloc(sizeof(node_inl *));
+	node_inl * new;
+	node_inl * il;
+	node_inl * first_head = NULL;
+	node_inl * first_close = NULL;
 	int first_close_delims = 0;
 	int numdelims;
 
@@ -508,7 +508,7 @@ done:
 }
 
 // Parse backslash-escape or just a backslash, returning an inline.
-static struct inl* handle_backslash(subject *subj)
+static node_inl* handle_backslash(subject *subj)
 {
 	advance(subj);
 	unsigned char nextchar = peek_char(subj);
@@ -525,10 +525,10 @@ static struct inl* handle_backslash(subject *subj)
 
 // Parse an entity or a regular "&" string.
 // Assumes the subject has an '&' character at the current position.
-static struct inl* handle_entity(subject* subj)
+static node_inl* handle_entity(subject* subj)
 {
 	int match;
-	struct inl *result;
+	node_inl *result;
 	match = scan_entity(&subj->input, subj->pos);
 	if (match) {
 		result = make_entity(chunk_dup(&subj->input, subj->pos, match));
@@ -542,10 +542,10 @@ static struct inl* handle_entity(subject* subj)
 
 // Like make_str, but parses entities.
 // Returns an inline sequence consisting of str and entity elements.
-static struct inl *make_str_with_entities(chunk *content)
+static node_inl *make_str_with_entities(chunk *content)
 {
-	struct inl *result = NULL;
-	struct inl *new;
+	node_inl *result = NULL;
+	node_inl *new;
 	int searchpos;
 	char c;
 	subject subj;
@@ -634,7 +634,7 @@ static unsigned char *clean_title(chunk *title)
 
 // Parse an autolink or HTML tag.
 // Assumes the subject has a '<' character at the current position.
-static struct inl* handle_pointy_brace(subject* subj)
+static node_inl* handle_pointy_brace(subject* subj)
 {
 	int matchlen = 0;
 	chunk contents;
@@ -693,7 +693,7 @@ static struct inl* handle_pointy_brace(subject* subj)
 static int link_label(subject* subj, chunk *raw_label)
 {
 	int nestlevel = 0;
-	struct inl* tmp = NULL;
+	node_inl* tmp = NULL;
 	int startpos = subj->pos;
 
 	if (subj->label_nestlevel) {
@@ -751,10 +751,10 @@ static int link_label(subject* subj, chunk *raw_label)
 }
 
 // Parse a link or the link portion of an image, or return a fallback.
-static struct inl* handle_left_bracket(subject* subj)
+static node_inl* handle_left_bracket(subject* subj)
 {
-	struct inl *lab = NULL;
-	struct inl *result = NULL;
+	node_inl *lab = NULL;
+	node_inl *result = NULL;
 	reference *ref;
 	int n;
 	int sps;
@@ -838,7 +838,7 @@ static struct inl* handle_left_bracket(subject* subj)
 
 // Parse a hard or soft linebreak, returning an inline.
 // Assumes the subject has a newline at the current position.
-static struct inl* handle_newline(subject *subj)
+static node_inl* handle_newline(subject *subj)
 {
 	int nlpos = subj->pos;
 	// skip over newline
@@ -862,16 +862,16 @@ inline static int not_eof(subject* subj)
 }
 
 // Parse inlines while a predicate is satisfied.  Return inlines.
-extern struct inl* parse_inlines_while(subject* subj, int (*f)(subject*))
+extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*))
 {
-	struct inl* result = NULL;
-	struct inl** last = &result;
+	node_inl* result = NULL;
+	node_inl** last = &result;
 	while ((*f)(subj) && parse_inline(subj, last)) {
 	}
 	return result;
 }
 
-struct inl *parse_chunk_inlines(chunk *chunk, reference** refmap)
+node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap)
 {
 	subject subj;
 	subject_from_chunk(&subj, chunk, refmap);
@@ -894,9 +894,9 @@ static int subject_find_special_char(subject *subj)
 // Parse an inline, advancing subject, and add it to last element.
 // Adjust tail to point to new last element of list.
 // Return 0 if no inline can be parsed, 1 otherwise.
-static int parse_inline(subject* subj, struct inl ** last)
+static int parse_inline(subject* subj, node_inl ** last)
 {
-	struct inl* new = NULL;
+	node_inl* new = NULL;
 	chunk contents;
 	unsigned char c;
 	int endpos;
@@ -971,7 +971,7 @@ static int parse_inline(subject* subj, struct inl ** last)
 	return 1;
 }
 
-extern struct inl* parse_inlines(strbuf *input, reference** refmap)
+extern node_inl* parse_inlines(strbuf *input, reference** refmap)
 {
 	subject subj;
 	subject_from_buf(&subj, input, refmap);
diff --git a/src/print.c b/src/print.c
index 63f63c8..01e9136 100644
--- a/src/print.c
+++ b/src/print.c
@@ -117,7 +117,7 @@ extern void print_blocks(block* b, int indent)
 }
 
 // Prettyprint an inline list, for debugging.
-extern void print_inlines(struct inl* ils, int indent)
+extern void print_inlines(node_inl* ils, int indent)
 {
 	while(ils != NULL) {
 		for (int i=0; i < indent; i++) {
diff --git a/src/stmd.h b/src/stmd.h
index 9ed33ec..dbc8c8c 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -10,7 +10,7 @@
 #define VERSION "0.1"
 #define CODE_INDENT 4
 
-struct inl {
+struct node_inl {
 	enum {
 		INL_STRING,
 		INL_SOFTBREAK,
@@ -25,22 +25,26 @@ struct inl {
 	} tag;
 	union {
 		chunk literal;
-		struct inl *inlines;
+		struct node_inl *inlines;
 		struct {
-			struct inl *label;
+			struct node_inl *label;
 			unsigned char *url;
 			unsigned char *title;
 		} linkable;
 	} content;
-	struct inl *next;
+	struct node_inl *next;
 };
 
-typedef struct Reference {
+typedef struct node_inl node_inl;
+
+struct reference {
   unsigned char *label;
   unsigned char *url;
   unsigned char *title;
-  UT_hash_handle  hh;      // used by uthash
-} reference;
+  UT_hash_handle  hh; // used by uthash
+};
+
+typedef struct reference reference;
 
 // Types for blocks
 
@@ -87,7 +91,7 @@ typedef struct Block {
   struct Block*      parent;
   struct Block*      top;
   strbuf			 string_content;
-  struct inl*               inline_content;
+  node_inl*               inline_content;
   union  {
     struct ListData       list_data;
     struct FencedCodeData fenced_code_data;
@@ -98,8 +102,8 @@ typedef struct Block {
   struct Block *     prev;
 } block;
 
-struct inl* parse_inlines(strbuf *input, reference** refmap);
-void free_inlines(struct inl* e);
+node_inl* parse_inlines(strbuf *input, reference** refmap);
+void free_inlines(node_inl* e);
 
 int parse_reference(strbuf *input, reference** refmap);
 void free_reference(reference *ref);
@@ -116,11 +120,11 @@ void free_blocks(block* e);
 extern block *stmd_parse_document(const unsigned char *buffer, size_t len);
 extern block *stmd_parse_file(FILE *f);
 
-void print_inlines(struct inl* ils, int indent);
+void print_inlines(node_inl* ils, int indent);
 void print_blocks(block* blk, int indent);
 
 void blocks_to_html(strbuf *html, block *b, bool tight);
-void inlines_to_html(strbuf *html, struct inl *b);
+void inlines_to_html(strbuf *html, node_inl *b);
 
 void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len);
 
-- 
cgit v1.2.3


From add5dd1b9a9ba8c58cdc6ca0bb62d287acb56278 Mon Sep 17 00:00:00 2001
From: Vicent Marti <tanoku@gmail.com>
Date: Thu, 4 Sep 2014 19:40:27 +0200
Subject: Remove warnings

---
 Makefile        |  4 ++--
 src/inlines.c   | 17 +++++++++++++++--
 src/scanners.re | 41 ++++++++++++++++++++++++-----------------
 3 files changed, 41 insertions(+), 21 deletions(-)

(limited to 'src/inlines.c')

diff --git a/Makefile b/Makefile
index ed4ddd5..0d2eb8b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
-CFLAGS=-g -O3 -Wall -Wextra -Wno-unused-variable -std=c99 -Isrc $(OPTFLAGS)
-LDFLAGS=-g -O3 -Wall -Wno-unused-variable # -Werror
+CFLAGS=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS)
+LDFLAGS=-g -O3 -Wall -Werror
 SRCDIR=src
 DATADIR=data
 
diff --git a/src/inlines.c b/src/inlines.c
index 6bb89da..5e0f3e5 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -108,13 +108,26 @@ extern void add_reference(reference** refmap, reference* ref)
 	}
 }
 
+static unsigned char *bufdup(const unsigned char *buf)
+{
+	unsigned char *new = NULL;
+
+	if (!buf) {
+		int len = strlen((char *)buf);
+		new = malloc(len + 1);
+		memcpy(new, buf, len + 1);
+	}
+
+	return new;
+}
+
 inline static node_inl* make_link_from_reference(node_inl* label, reference *ref)
 {
 	node_inl* e = (node_inl*) malloc(sizeof(node_inl));
 	e->tag = INL_LINK;
 	e->content.linkable.label = label;
-	e->content.linkable.url   = strdup(ref->url);
-	e->content.linkable.title = ref->title ? strdup(ref->title) : NULL;
+	e->content.linkable.url   = bufdup(ref->url);
+	e->content.linkable.title = bufdup(ref->title);
 	e->next = NULL;
 	return e;
 }
diff --git a/src/scanners.re b/src/scanners.re
index 71103f6..28aba9d 100644
--- a/src/scanners.re
+++ b/src/scanners.re
@@ -1,9 +1,5 @@
 #include <stdlib.h>
 
-#define SCAN_DATA \
-  const unsigned char *marker = NULL; \
-  const unsigned char *start = p; \
-
 /*!re2c
   re2c:define:YYCTYPE  = "unsigned char";
   re2c:define:YYCURSOR = p;
@@ -61,7 +57,8 @@
 // Try to match URI autolink after first <, returning number of chars matched.
 extern int _scan_autolink_uri(const unsigned char *p)
 {
-  SCAN_DATA;
+  const unsigned char *marker = NULL;
+  const unsigned char *start = p;
 /*!re2c
   scheme [:]([^\x00-\x20<>\\]|escaped_char)*[>]  { return (p - start); }
   .? { return 0; }
@@ -71,7 +68,8 @@ extern int _scan_autolink_uri(const unsigned char *p)
 // Try to match email autolink after first <, returning num of chars matched.
 extern int _scan_autolink_email(const unsigned char *p)
 {
-  SCAN_DATA;
+  const unsigned char *marker = NULL;
+  const unsigned char *start = p;
 /*!re2c
   [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+
     [@]
@@ -85,7 +83,8 @@ extern int _scan_autolink_email(const unsigned char *p)
 // Try to match an HTML tag after first <, returning num of chars matched.
 extern int _scan_html_tag(const unsigned char *p)
 {
-  SCAN_DATA;
+  const unsigned char *marker = NULL;
+  const unsigned char *start = p;
 /*!re2c
   htmltag { return (p - start); }
   .? { return 0; }
@@ -96,7 +95,8 @@ extern int _scan_html_tag(const unsigned char *p)
 // returning num of chars matched.
 extern int _scan_html_block_tag(const unsigned char *p)
 {
-  SCAN_DATA;
+  const unsigned char *marker = NULL;
+  const unsigned char *start = p;
 /*!re2c
   [<] [/] blocktagname (spacechar | [>])  { return (p - start); }
   [<] blocktagname (spacechar | [/>]) { return (p - start); }
@@ -111,7 +111,8 @@ extern int _scan_html_block_tag(const unsigned char *p)
 // Newlines aren't ever allowed.
 extern int _scan_link_url(const unsigned char *p)
 {
-  SCAN_DATA;
+  const unsigned char *marker = NULL;
+  const unsigned char *start = p;
 /*!re2c
   [ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); }
   [ \n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); }
@@ -124,7 +125,8 @@ extern int _scan_link_url(const unsigned char *p)
 // level of internal nesting (quotes within quotes).
 extern int _scan_link_title(const unsigned char *p)
 {
-  SCAN_DATA;
+  const unsigned char *marker = NULL;
+  const unsigned char *start = p;
 /*!re2c
   ["] (escaped_char|[^"\x00])* ["]   { return (p - start); }
   ['] (escaped_char|[^'\x00])* ['] { return (p - start); }
@@ -136,7 +138,7 @@ extern int _scan_link_title(const unsigned char *p)
 // Match space characters, including newlines.
 extern int _scan_spacechars(const unsigned char *p)
 {
-  SCAN_DATA;
+  const unsigned char *start = p; \
 /*!re2c
   [ \t\n]* { return (p - start); }
   . { return 0; }
@@ -146,7 +148,8 @@ extern int _scan_spacechars(const unsigned char *p)
 // Match ATX header start.
 extern int _scan_atx_header_start(const unsigned char *p)
 {
-  SCAN_DATA;
+  const unsigned char *marker = NULL;
+  const unsigned char *start = p;
 /*!re2c
   [#]{1,6} ([ ]+|[\n])  { return (p - start); }
   .? { return 0; }
@@ -157,7 +160,7 @@ extern int _scan_atx_header_start(const unsigned char *p)
 // 2 for level-2, 0 for no match.
 extern int _scan_setext_header_line(const unsigned char *p)
 {
-  SCAN_DATA;
+  const unsigned char *marker = NULL;
 /*!re2c
   [=]+ [ ]* [\n] { return 1; }
   [-]+ [ ]* [\n] { return 2; }
@@ -170,7 +173,8 @@ extern int _scan_setext_header_line(const unsigned char *p)
 // spaces between the hyphens or asterisks."
 extern int _scan_hrule(const unsigned char *p)
 {
-  SCAN_DATA;
+  const unsigned char *marker = NULL;
+  const unsigned char *start = p;
 /*!re2c
   ([*][ ]*){3,} [ \t]* [\n] { return (p - start); }
   ([_][ ]*){3,} [ \t]* [\n] { return (p - start); }
@@ -182,7 +186,8 @@ extern int _scan_hrule(const unsigned char *p)
 // Scan an opening code fence.
 extern int _scan_open_code_fence(const unsigned char *p)
 {
-  SCAN_DATA;
+  const unsigned char *marker = NULL;
+  const unsigned char *start = p;
 /*!re2c
   [`]{3,} / [^`\n\x00]*[\n] { return (p - start); }
   [~]{3,} / [^~\n\x00]*[\n] { return (p - start); }
@@ -193,7 +198,8 @@ extern int _scan_open_code_fence(const unsigned char *p)
 // Scan a closing code fence with length at least len.
 extern int _scan_close_code_fence(const unsigned char *p)
 {
-  SCAN_DATA;
+  const unsigned char *marker = NULL;
+  const unsigned char *start = p;
 /*!re2c
   ([`]{3,} | [~]{3,}) / spacechar* [\n] { return (p - start); }
   .? { return 0; }
@@ -204,7 +210,8 @@ extern int _scan_close_code_fence(const unsigned char *p)
 // Returns number of chars matched.
 extern int _scan_entity(const unsigned char *p)
 {
-  SCAN_DATA;
+  const unsigned char *marker = NULL;
+  const unsigned char *start = p;
 /*!re2c
   [&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;]
      { return (p - start); }
-- 
cgit v1.2.3


From 278b89d092cae8fe9cdd6346c69512886d36abbd Mon Sep 17 00:00:00 2001
From: Vicent Marti <tanoku@gmail.com>
Date: Thu, 4 Sep 2014 20:04:21 +0200
Subject: Silly me

---
 src/inlines.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src/inlines.c')

diff --git a/src/inlines.c b/src/inlines.c
index 5e0f3e5..6b17027 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -112,7 +112,7 @@ static unsigned char *bufdup(const unsigned char *buf)
 {
 	unsigned char *new = NULL;
 
-	if (!buf) {
+	if (buf) {
 		int len = strlen((char *)buf);
 		new = malloc(len + 1);
 		memcpy(new, buf, len + 1);
-- 
cgit v1.2.3


From 61e3e606e64221eaa5cf3d83dc598d5a42818d10 Mon Sep 17 00:00:00 2001
From: Vicent Marti <tanoku@gmail.com>
Date: Sat, 6 Sep 2014 20:48:05 +0200
Subject: UTF8-aware detabbing and entity handling

---
 Makefile           | 13 ++++++-----
 src/blocks.c       | 35 ++++++------------------------
 src/html/houdini.h |  2 ++
 src/html/html.c    |  1 -
 src/inlines.c      | 63 ++++++++++++++++++++++--------------------------------
 src/print.c        |  5 -----
 src/stmd.h         |  3 ---
 src/utf8.c         | 59 ++++++++++++++++++++++++++++++++++++++++++++------
 8 files changed, 95 insertions(+), 86 deletions(-)

(limited to 'src/inlines.c')

diff --git a/Makefile b/Makefile
index 0d2eb8b..b5e487d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
-CFLAGS=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS)
-LDFLAGS=-g -O3 -Wall -Werror
+CFLAGS=-g -pg -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS)
+LDFLAGS=-g -pg -O3 -Wall -Werror
 SRCDIR=src
 DATADIR=data
 
@@ -41,11 +41,11 @@ testjs: spec.txt
 benchjs:
 	node js/bench.js ${BENCHINP}
 
-HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o
+HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o $(SRCDIR)/html/houdini_html_u.o
 STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o
 
-$(PROG): $(SRCDIR)/main.c $(HTML_OBJ) $(STMD_OBJ)
-	$(CC) $(LDFLAGS) -o $@ $^
+$(PROG): $(SRCDIR)/html/html_unescape.h $(SRCDIR)/case_fold_switch.inc $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c
+	$(CC) $(LDFLAGS) -o $@ $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c
 
 $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re
 	re2c --case-insensitive -bis $< > $@ || (rm $@ && false)
@@ -53,6 +53,9 @@ $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re
 $(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding-3.2.0.txt
 	perl mkcasefold.pl < $< > $@
 
+$(SRCDIR)/html/html_unescape.h: $(SRCDIR)/html/html_unescape.gperf
+	gperf -I -t -N find_entity -H hash_entity -K entity -C -l --null-strings -m5 $< > $@
+
 .PHONY: leakcheck clean fuzztest dingus upload
 
 dingus:
diff --git a/src/blocks.c b/src/blocks.c
index f671b5e..8c7d49c 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -5,6 +5,8 @@
 #include <ctype.h>
 
 #include "stmd.h"
+#include "utf8.h"
+#include "html/houdini.h"
 #include "scanners.h"
 #include "uthash.h"
 
@@ -184,7 +186,7 @@ static void finalize(node_block* b, int line_number)
 			firstlinelen = strbuf_strchr(&b->string_content, '\n', 0);
 
 			strbuf_init(&b->attributes.fenced_code_data.info, 0);
-			strbuf_set(
+			houdini_unescape_html_f(
 				&b->attributes.fenced_code_data.info,
 				b->string_content.ptr,
 				firstlinelen
@@ -369,31 +371,6 @@ static int lists_match(struct ListData list_data,
 			list_data.bullet_char == item_data.bullet_char);
 }
 
-static void expand_tabs(strbuf *ob, const unsigned char *line, size_t size)
-{
-	size_t  i = 0, tab = 0;
-
-	while (i < size) {
-		size_t org = i;
-
-		while (i < size && line[i] != '\t') {
-			i++; tab++;
-		}
-
-		if (i > org)
-			strbuf_put(ob, line + org, i - org);
-
-		if (i >= size)
-			break;
-
-		do {
-			strbuf_putc(ob, ' '); tab++;
-		} while (tab % 4);
-
-		i++;
-	}
-}
-
 static node_block *finalize_document(node_block *document, int linenum)
 {
 	while (document != document->top) {
@@ -415,7 +392,7 @@ extern node_block *stmd_parse_file(FILE *f)
 	node_block *document = make_document();
 
 	while (fgets((char *)buffer, sizeof(buffer), f)) {
-		expand_tabs(&line, buffer, strlen((char *)buffer));
+		utf8proc_detab(&line, buffer, strlen((char *)buffer));
 		incorporate_line(&line, linenum, &document);
 		strbuf_clear(&line);
 		linenum++;
@@ -436,10 +413,10 @@ extern node_block *stmd_parse_document(const unsigned char *buffer, size_t len)
 		const unsigned char *eol = memchr(buffer, '\n', end - buffer);
 
 		if (!eol) {
-			expand_tabs(&line, buffer, end - buffer);
+			utf8proc_detab(&line, buffer, end - buffer);
 			buffer = end;
 		} else {
-			expand_tabs(&line, buffer, (eol - buffer) + 1);
+			utf8proc_detab(&line, buffer, (eol - buffer) + 1);
 			buffer += (eol - buffer) + 1;
 		}
 
diff --git a/src/html/houdini.h b/src/html/houdini.h
index 1e54d20..5fd690d 100644
--- a/src/html/houdini.h
+++ b/src/html/houdini.h
@@ -25,9 +25,11 @@ extern "C" {
 #define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
 #define HOUDINI_UNESCAPED_SIZE(x) (x)
 
+extern size_t houdini_unescape_ent(strbuf *ob, const uint8_t *src, size_t size);
 extern int houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size);
 extern int houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure);
 extern int houdini_unescape_html(strbuf *ob, const uint8_t *src, size_t size);
+extern void houdini_unescape_html_f(strbuf *ob, const uint8_t *src, size_t size);
 extern int houdini_escape_xml(strbuf *ob, const uint8_t *src, size_t size);
 extern int houdini_escape_uri(strbuf *ob, const uint8_t *src, size_t size);
 extern int houdini_escape_url(strbuf *ob, const uint8_t *src, size_t size);
diff --git a/src/html/html.c b/src/html/html.c
index 758ec80..595dfcd 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -166,7 +166,6 @@ void inlines_to_html(strbuf *html, node_inl* ils)
 				break;
 
 			case INL_RAW_HTML:
-			case INL_ENTITY:
 				strbuf_put(html,
 						ils->content.literal.data,
 						ils->content.literal.len);
diff --git a/src/inlines.c b/src/inlines.c
index 6b17027..7b27150 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -5,6 +5,8 @@
 #include <ctype.h>
 
 #include "stmd.h"
+#include "html/houdini.h"
+#include "utf8.h"
 #include "uthash.h"
 #include "scanners.h"
 
@@ -176,7 +178,6 @@ inline static node_inl* make_simple(int t)
 #define make_str(s) make_literal(INL_STRING, s)
 #define make_code(s) make_literal(INL_CODE, s)
 #define make_raw_html(s) make_literal(INL_RAW_HTML, s)
-#define make_entity(s) make_literal(INL_ENTITY, s)
 #define make_linebreak() make_simple(INL_LINEBREAK)
 #define make_softbreak() make_simple(INL_SOFTBREAK)
 #define make_emph(contents) make_inlines(INL_EMPH, contents)
@@ -191,7 +192,6 @@ extern void free_inlines(node_inl* e)
 			case INL_STRING:
 			case INL_RAW_HTML:
 			case INL_CODE:
-			case INL_ENTITY:
 				chunk_free(&e->content.literal);
 				break;
 			case INL_LINEBREAK:
@@ -540,45 +540,34 @@ static node_inl* handle_backslash(subject *subj)
 // Assumes the subject has an '&' character at the current position.
 static node_inl* handle_entity(subject* subj)
 {
-	int match;
-	node_inl *result;
-	match = scan_entity(&subj->input, subj->pos);
-	if (match) {
-		result = make_entity(chunk_dup(&subj->input, subj->pos, match));
-		subj->pos += match;
-	} else {
-		advance(subj);
-		result = make_str(chunk_literal("&"));
-	}
-	return result;
+	strbuf ent = GH_BUF_INIT;
+	size_t len;
+
+	advance(subj);
+
+	len = houdini_unescape_ent(&ent,
+		subj->input.data + subj->pos,
+		subj->input.len - subj->pos
+	);
+
+	if (len == 0)
+		return make_str(chunk_literal("&"));
+
+	subj->pos += len;
+	return make_str(chunk_buf_detach(&ent));
 }
 
 // Like make_str, but parses entities.
 // Returns an inline sequence consisting of str and entity elements.
 static node_inl *make_str_with_entities(chunk *content)
 {
-	node_inl *result = NULL;
-	node_inl *new;
-	int searchpos;
-	char c;
-	subject subj;
-
-	subject_from_chunk(&subj, content, NULL);
+	strbuf unescaped = GH_BUF_INIT;
 
-	while ((c = peek_char(&subj))) {
-		switch (c) {
-			case '&':
-				new = handle_entity(&subj);
-				break;
-			default:
-				searchpos = chunk_strchr(&subj.input, '&', subj.pos);
-				new = make_str(chunk_dup(&subj.input, subj.pos, searchpos - subj.pos));
-				subj.pos = searchpos;
-		}
-		result = append_inlines(result, new);
+	if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) {
+		return make_str(chunk_buf_detach(&unescaped));
+	} else {
+		return make_str(*content);
 	}
-
-	return result;
 }
 
 // Destructively unescape a string: remove backslashes before punctuation chars.
@@ -611,9 +600,9 @@ static unsigned char *clean_url(chunk *url, int is_email)
 		strbuf_puts(&buf, "mailto:");
 
 	if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
-		strbuf_put(&buf, url->data + 1, url->len - 2);
+		houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
 	} else {
-		strbuf_put(&buf, url->data, url->len);
+		houdini_unescape_html_f(&buf, url->data, url->len);
 	}
 
 	unescape_buffer(&buf);
@@ -636,9 +625,9 @@ static unsigned char *clean_title(chunk *title)
 	if ((first == '\'' && last == '\'') ||
 		(first == '(' && last == ')') ||
 		(first == '"' && last == '"')) {
-		strbuf_set(&buf, title->data + 1, title->len - 2);
+		houdini_unescape_html_f(&buf, title->data + 1, title->len - 2);
 	} else {
-		strbuf_set(&buf, title->data, title->len);
+		houdini_unescape_html_f(&buf, title->data, title->len);
 	}
 
 	unescape_buffer(&buf);
diff --git a/src/print.c b/src/print.c
index 0ff86fa..9240dac 100644
--- a/src/print.c
+++ b/src/print.c
@@ -145,11 +145,6 @@ extern void print_inlines(node_inl* ils, int indent)
 			print_str(ils->content.literal.data, ils->content.literal.len);
 			putchar('\n');
 			break;
-		case INL_ENTITY:
-			printf("entity ");
-			print_str(ils->content.literal.data, ils->content.literal.len);
-			putchar('\n');
-			break;
 		case INL_LINK:
 		case INL_IMAGE:
 			printf("%s url=", ils->tag == INL_LINK ? "link" : "image");
diff --git a/src/stmd.h b/src/stmd.h
index be65371..c80eeda 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -17,7 +17,6 @@ struct node_inl {
 		INL_LINEBREAK,
 		INL_CODE,
 		INL_RAW_HTML,
-		INL_ENTITY,
 		INL_EMPH,
 		INL_STRONG,
 		INL_LINK,
@@ -133,6 +132,4 @@ void print_blocks(node_block* blk, int indent);
 void blocks_to_html(strbuf *html, node_block *b, bool tight);
 void inlines_to_html(strbuf *html, node_inl *b);
 
-void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len);
-
 #endif
diff --git a/src/utf8.c b/src/utf8.c
index cebd872..12d7ba5 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -3,7 +3,7 @@
 #include <unistd.h>
 #include <assert.h>
 
-#include "stmd.h"
+#include "utf8.h"
 
 static const int8_t utf8proc_utf8class[256] = {
 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -23,6 +23,12 @@ static const int8_t utf8proc_utf8class[256] = {
 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
 	4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 };
 
+static void encode_unknown(strbuf *buf)
+{
+	static const unsigned char repl[] = {239, 191, 189};
+	strbuf_put(buf, repl, 3);
+}
+
 ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len)
 {
 	ssize_t length, i;
@@ -46,6 +52,46 @@ ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len)
 	return length;
 }
 
+void utf8proc_detab(strbuf *ob, const unsigned char *line, size_t size)
+{
+	static const unsigned char whitespace[] = "    ";
+
+	size_t i = 0, tab = 0;
+
+	while (i < size) {
+		size_t org = i;
+
+		while (i < size && line[i] != '\t' && line[i] <= 0x80) {
+			i++; tab++;
+		}
+
+		if (i > org)
+			strbuf_put(ob, line + org, i - org);
+
+		if (i >= size)
+			break;
+
+		if (line[i] == '\t') {
+			int numspaces = 4 - (tab % 4);
+			strbuf_put(ob, whitespace, numspaces);
+			i += 1;
+			tab += numspaces;
+		} else {
+			ssize_t charlen = utf8proc_charlen(line + i, size - i);
+
+			if (charlen < 0) {
+				encode_unknown(ob);
+				i++;
+			} else {
+				strbuf_put(ob, line + i, charlen);
+				i += charlen;
+			}
+
+			tab += 1;
+		}
+	}
+}
+
 ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst)
 {
 	ssize_t length;
@@ -89,9 +135,9 @@ void utf8proc_encode_char(int32_t uc, strbuf *buf)
 	unsigned char dst[4];
 	int len = 0;
 
-	if (uc < 0x00) {
-		assert(false);
-	} else if (uc < 0x80) {
+	assert(uc >= 0);
+
+	if (uc < 0x80) {
 		dst[0] = uc;
 		len = 1;
 	} else if (uc < 0x800) {
@@ -116,7 +162,8 @@ void utf8proc_encode_char(int32_t uc, strbuf *buf)
 		dst[3] = 0x80 + (uc & 0x3F);
 		len = 4;
 	} else {
-		assert(false);
+		encode_unknown(buf);
+		return;
 	}
 
 	strbuf_put(buf, dst, len);
@@ -133,7 +180,7 @@ void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len)
 		ssize_t char_len = utf8proc_iterate(str, len, &c);
 
 		if (char_len < 0) {
-			bufpush(0xFFFD);
+			encode_unknown(dest);
 			continue;
 		}
 
-- 
cgit v1.2.3


From 798f58a2b614280201141b398c8e498cecc8ab5e Mon Sep 17 00:00:00 2001
From: Vicent Marti <tanoku@gmail.com>
Date: Sat, 6 Sep 2014 21:17:23 +0200
Subject: This is going well

---
 spec.txt      | 35 +++++++++++++++++++-----------
 src/inlines.c | 68 +++++++++++++++++++++++++++++++++++------------------------
 2 files changed, 64 insertions(+), 39 deletions(-)

(limited to 'src/inlines.c')

diff --git a/spec.txt b/spec.txt
index 616cb96..ebd6d98 100644
--- a/spec.txt
+++ b/spec.txt
@@ -3688,7 +3688,7 @@ raw HTML:
 .
 <http://google.com?find=\*>
 .
-<p><a href="http://google.com?find=\*">http://google.com?find=\*</a></p>
+<p><a href="http://google.com?find=%5C*">http://google.com?find=\*</a></p>
 .
 
 .
@@ -3727,25 +3727,37 @@ foo
 
 ## Entities
 
-Entities are parsed as entities, not as literal text, in all contexts
-except code spans and code blocks. Three kinds of entities are recognized.
+With the goal of making this standard as HTML-agnostic as possible, all HTML valid HTML Entities in any
+context are recognized as such and converted into their actual values (i.e. the UTF8 characters representing
+the entity itself) before they are stored in the AST.
+
+This allows implementations that target HTML output to trivially escape the entities when generating HTML,
+and simplifies the job of implementations targetting other languages, as these will only need to handle the
+UTF8 chars and need not be HTML-entity aware.
 
 [Named entities](#name-entities) <a id="named-entities"></a> consist of `&`
-+ a string of 2-32 alphanumerics beginning with a letter + `;`.
++ any of the valid HTML5 entity names + `;`. The [following document](http://www.whatwg.org/specs/web-apps/current-work/multipage/entities.json)
+is used as an authoritative source of the valid entity names and their corresponding codepoints.
+
+Conforming implementations that target Markdown don't need to generate entities for all the valid
+named entities that exist, with the exception of `"` (`&quot;`), `&` (`&amp;`), `<` (`&lt;`) and `>` (`&gt;`),
+which always need to be written as entities for security reasons.
 
 .
 &nbsp; &amp; &copy; &AElig; &Dcaron; &frac34; &HilbertSpace; &DifferentialD; &ClockwiseContourIntegral;
 .
-<p>&nbsp; &amp; &copy; &AElig; &Dcaron; &frac34; &HilbertSpace; &DifferentialD; &ClockwiseContourIntegral;</p>
+<p>  &amp; © Æ Ď ¾ ℋ ⅆ ∲</p>
 .
 
 [Decimal entities](#decimal-entities) <a id="decimal-entities"></a>
-consist of `&#` + a string of 1--8 arabic digits + `;`.
+consist of `&#` + a string of 1--8 arabic digits + `;`. Again, these entities need to be recognised
+and tranformed into their corresponding UTF8 codepoints. Invalid Unicode codepoints will be written
+as the "unknown codepoint" character (`0xFFFD`)
 
 .
- &#35; &#1234; &#992; &#98765432;
+&#35; &#1234; &#992; &#98765432;
 .
-<p> &#35; &#1234; &#992; &#98765432;</p>
+<p># Ӓ Ϡ �</p>
 .
 
 [Hexadecimal entities](#hexadecimal-entities) <a id="hexadecimal-entities"></a>
@@ -3767,7 +3779,7 @@ Here are some nonentities:
 .
 
 Although HTML5 does accept some entities without a trailing semicolon
-(such as `&copy`), these are not recognized as entities here:
+(such as `&copy`), these are not recognized as entities here, because it makes the grammar too ambiguous:
 
 .
 &copy
@@ -3775,13 +3787,12 @@ Although HTML5 does accept some entities without a trailing semicolon
 <p>&amp;copy</p>
 .
 
-On the other hand, many strings that are not on the list of HTML5
-named entities are recognized as entities here:
+Strings that are not on the list of HTML5 named entities are not recognized as entities either:
 
 .
 &MadeUpEntity;
 .
-<p>&MadeUpEntity;</p>
+<p>&amp;MadeUpEntity;</p>
 .
 
 Entities are recognized in any context besides code spans or
diff --git a/src/inlines.c b/src/inlines.c
index 7b27150..aa0e13e 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -20,8 +20,9 @@ typedef struct Subject {
 reference* lookup_reference(reference** refmap, chunk *label);
 reference* make_reference(chunk *label, chunk *url, chunk *title);
 
-static unsigned char *clean_url(chunk *url, int is_email);
+static unsigned char *clean_url(chunk *url);
 static unsigned char *clean_title(chunk *title);
+static unsigned char *clean_autolink(chunk *url, int is_email);
 
 inline static void chunk_free(chunk *c);
 inline static void chunk_trim(chunk *c);
@@ -91,7 +92,7 @@ extern reference* make_reference(chunk *label, chunk *url, chunk *title)
 	reference *ref;
 	ref = malloc(sizeof(reference));
 	ref->label = normalize_reference(label);
-	ref->url = clean_url(url, 0);
+	ref->url = clean_url(url);
 	ref->title = clean_title(title);
 	return ref;
 }
@@ -123,27 +124,31 @@ static unsigned char *bufdup(const unsigned char *buf)
 	return new;
 }
 
-inline static node_inl* make_link_from_reference(node_inl* label, reference *ref)
+static inline node_inl *make_link_(node_inl *label, unsigned char *url, unsigned char *title)
 {
 	node_inl* e = (node_inl*) malloc(sizeof(node_inl));
 	e->tag = INL_LINK;
 	e->content.linkable.label = label;
-	e->content.linkable.url   = bufdup(ref->url);
-	e->content.linkable.title = bufdup(ref->title);
+	e->content.linkable.url   = url;
+	e->content.linkable.title = title;
 	e->next = NULL;
 	return e;
 }
 
+inline static node_inl* make_ref_link(node_inl* label, reference *ref)
+{
+	return make_link_(label, bufdup(ref->url), bufdup(ref->title));
+}
+
+inline static node_inl* make_autolink(node_inl* label, chunk url, int is_email)
+{
+	return make_link_(label, clean_autolink(&url, is_email), NULL);
+}
+
 // Create an inline with a linkable string value.
-inline static node_inl* make_link(node_inl* label, chunk url, chunk title, int is_email)
+inline static node_inl* make_link(node_inl* label, chunk url, chunk title)
 {
-	node_inl* e = (node_inl*) malloc(sizeof(node_inl));
-	e->tag = INL_LINK;
-	e->content.linkable.label = label;
-	e->content.linkable.url   = clean_url(&url, is_email);
-	e->content.linkable.title = clean_title(&title);
-	e->next = NULL;
-	return e;
+	return make_link_(label, clean_url(&url), clean_title(&title));
 }
 
 inline static node_inl* make_inlines(int t, node_inl* contents)
@@ -587,7 +592,7 @@ extern void unescape_buffer(strbuf *buf)
 
 // Clean a URL: remove surrounding whitespace and surrounding <>,
 // and remove \ that escape punctuation.
-static unsigned char *clean_url(chunk *url, int is_email)
+static unsigned char *clean_url(chunk *url)
 {
 	strbuf buf = GH_BUF_INIT;
 
@@ -596,9 +601,6 @@ static unsigned char *clean_url(chunk *url, int is_email)
 	if (url->len == 0)
 		return NULL;
 
-	if (is_email)
-		strbuf_puts(&buf, "mailto:");
-
 	if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
 		houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
 	} else {
@@ -609,6 +611,22 @@ static unsigned char *clean_url(chunk *url, int is_email)
 	return strbuf_detach(&buf);
 }
 
+static unsigned char *clean_autolink(chunk *url, int is_email)
+{
+	strbuf buf = GH_BUF_INIT;
+
+	chunk_trim(url);
+
+	if (url->len == 0)
+		return NULL;
+
+	if (is_email)
+		strbuf_puts(&buf, "mailto:");
+
+	houdini_unescape_html_f(&buf, url->data, url->len);
+	return strbuf_detach(&buf);
+}
+
 // Clean a title: remove surrounding quotes and remove \ that escape punctuation.
 static unsigned char *clean_title(chunk *title)
 {
@@ -649,11 +667,9 @@ static node_inl* handle_pointy_brace(subject* subj)
 		contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
 		subj->pos += matchlen;
 
-		return make_link(
+		return make_autolink(
 			make_str_with_entities(&contents),
-			contents,
-			chunk_literal(""),
-			0
+			contents, 0
 		);
 	}
 
@@ -663,11 +679,9 @@ static node_inl* handle_pointy_brace(subject* subj)
 		contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
 		subj->pos += matchlen;
 
-		return make_link(
+		return make_autolink(
 				make_str_with_entities(&contents),
-				contents,
-				chunk_literal(""),
-				1
+				contents, 1
 		);
 	}
 
@@ -792,7 +806,7 @@ static node_inl* handle_left_bracket(subject* subj)
 				title = chunk_dup(&subj->input, starttitle, endtitle - starttitle);
 				lab = parse_chunk_inlines(&rawlabel, NULL);
 
-				return make_link(lab, url, title, 0);
+				return make_link(lab, url, title);
 			} else {
 				// if we get here, we matched a label but didn't get further:
 				subj->pos = endlabel;
@@ -823,7 +837,7 @@ static node_inl* handle_left_bracket(subject* subj)
 			ref = lookup_reference(subj->reference_map, &reflabel);
 			if (ref != NULL) { // found
 				lab = parse_chunk_inlines(&rawlabel, NULL);
-				result = make_link_from_reference(lab, ref);
+				result = make_ref_link(lab, ref);
 			} else {
 				subj->pos = endlabel;
 				lab = parse_chunk_inlines(&rawlabel, subj->reference_map);
-- 
cgit v1.2.3


From 94a79a605f3e76a43f1f87a5044f6761b99e5ca5 Mon Sep 17 00:00:00 2001
From: Vicent Marti <tanoku@gmail.com>
Date: Wed, 10 Sep 2014 18:33:27 +0200
Subject: Cleanup reference implementation

---
 Makefile         |   2 +-
 src/blocks.c     |  16 ++---
 src/buffer.c     |  43 ++++++++++++++
 src/buffer.h     |   2 +
 src/inlines.c    | 176 +++++++------------------------------------------------
 src/references.c | 109 ++++++++++++++++++++++++++++++++++
 src/references.h |  27 +++++++++
 src/stmd.h       |  26 +++-----
 src/utf8.c       |  10 ++--
 src/utf8.h       |   5 +-
 10 files changed, 225 insertions(+), 191 deletions(-)
 create mode 100644 src/references.c
 create mode 100644 src/references.h

(limited to 'src/inlines.c')

diff --git a/Makefile b/Makefile
index 5d13272..11e2141 100644
--- a/Makefile
+++ b/Makefile
@@ -42,7 +42,7 @@ benchjs:
 	node js/bench.js ${BENCHINP}
 
 HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o $(SRCDIR)/html/houdini_html_u.o
-STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o
+STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o $(SRCDIR)/references.c
 
 $(PROG): $(SRCDIR)/html/html_unescape.h $(SRCDIR)/case_fold_switch.inc $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c
 	$(CC) $(LDFLAGS) -o $@ $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c
diff --git a/src/blocks.c b/src/blocks.c
index 72b2dc2..30a8284 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -8,7 +8,6 @@
 #include "utf8.h"
 #include "html/houdini.h"
 #include "scanners.h"
-#include "uthash.h"
 
 #define peek_at(i, n) (i)->data[n]
 
@@ -36,12 +35,7 @@ static node_block* make_block(int tag, int start_line, int start_column)
 extern node_block* make_document()
 {
 	node_block *e = make_block(BLOCK_DOCUMENT, 1, 1);
-	reference *map = NULL;
-	reference ** refmap;
-
-	refmap = (reference**) malloc(sizeof(reference*));
-	*refmap = map;
-	e->as.document.refmap = refmap;
+	e->as.document.refmap = reference_map_new();
 	e->top = e;
 
 	return e;
@@ -164,7 +158,7 @@ static void finalize(node_block* b, int line_number)
 		case BLOCK_PARAGRAPH:
 			pos = 0;
 			while (strbuf_at(&b->string_content, 0) == '[' &&
-					(pos = parse_reference(&b->string_content, b->top->as.document.refmap))) {
+					(pos = parse_reference_inline(&b->string_content, b->top->as.document.refmap))) {
 
 				strbuf_drop(&b->string_content, pos);
 			}
@@ -192,7 +186,7 @@ static void finalize(node_block* b, int line_number)
 			strbuf_drop(&b->string_content, firstlinelen + 1);
 
 			strbuf_trim(&b->as.code.info);
-			unescape_buffer(&b->as.code.info);
+			strbuf_unescape(&b->as.code.info);
 			break;
 
 		case BLOCK_LIST: // determine tight/loose status
@@ -268,7 +262,7 @@ extern void free_blocks(node_block* e)
 		if (e->tag == BLOCK_FENCED_CODE) {
 			strbuf_free(&e->as.code.info);
 		} else if (e->tag == BLOCK_DOCUMENT) {
-			free_reference_map(e->as.document.refmap);
+			reference_map_free(e->as.document.refmap);
 		}
 		free_blocks(e->children);
 		free(e);
@@ -278,7 +272,7 @@ extern void free_blocks(node_block* e)
 
 // Walk through node_block and all children, recursively, parsing
 // string content into inline content where appropriate.
-void process_inlines(node_block* cur, reference** refmap)
+void process_inlines(node_block* cur, reference_map *refmap)
 {
 	switch (cur->tag) {
 		case BLOCK_PARAGRAPH:
diff --git a/src/buffer.c b/src/buffer.c
index 90c2186..cdf8ca0 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -308,3 +308,46 @@ void strbuf_trim(strbuf *buf)
 
 	buf->ptr[buf->size] = '\0';
 }
+
+// Destructively modify string, collapsing consecutive
+// space and newline characters into a single space.
+void strbuf_normalize_whitespace(strbuf *s)
+{
+	bool last_char_was_space = false;
+	int r, w;
+
+	for (r = 0, w = 0; r < s->size; ++r) {
+		switch (s->ptr[r]) {
+		case ' ':
+		case '\n':
+			if (last_char_was_space)
+				break;
+
+			s->ptr[w++] = ' ';
+			last_char_was_space = true;
+			break;
+
+		default:
+			s->ptr[w++] = s->ptr[r];
+			last_char_was_space = false;
+		}
+	}
+
+	strbuf_truncate(s, w);
+}
+
+// Destructively unescape a string: remove backslashes before punctuation chars.
+extern void strbuf_unescape(strbuf *buf)
+{
+	int r, w;
+
+	for (r = 0, w = 0; r < buf->size; ++r) {
+		if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1]))
+			continue;
+
+		buf->ptr[w++] = buf->ptr[r];
+	}
+
+	strbuf_truncate(buf, w);
+}
+
diff --git a/src/buffer.h b/src/buffer.h
index 6f45cbb..1bc1eee 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -108,5 +108,7 @@ int strbuf_strrchr(const strbuf *buf, int c, int pos);
 void strbuf_drop(strbuf *buf, int n);
 void strbuf_truncate(strbuf *buf, int len);
 void strbuf_trim(strbuf *buf);
+void strbuf_normalize_whitespace(strbuf *s);
+void strbuf_unescape(strbuf *s);
 
 #endif
diff --git a/src/inlines.c b/src/inlines.c
index aa0e13e..3040f09 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -7,110 +7,23 @@
 #include "stmd.h"
 #include "html/houdini.h"
 #include "utf8.h"
-#include "uthash.h"
 #include "scanners.h"
 
 typedef struct Subject {
 	chunk input;
 	int pos;
-	int            label_nestlevel;
-	reference**    reference_map;
+	int label_nestlevel;
+	reference_map *refmap;
 } subject;
 
-reference* lookup_reference(reference** refmap, chunk *label);
-reference* make_reference(chunk *label, chunk *url, chunk *title);
-
-static unsigned char *clean_url(chunk *url);
-static unsigned char *clean_title(chunk *title);
-static unsigned char *clean_autolink(chunk *url, int is_email);
-
-inline static void chunk_free(chunk *c);
-inline static void chunk_trim(chunk *c);
-
-inline static chunk chunk_literal(const char *data);
-inline static chunk chunk_buf_detach(strbuf *buf);
-inline static chunk chunk_dup(const chunk *ch, int pos, int len);
-
-static node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
+static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap);
 static node_inl *parse_inlines_while(subject* subj, int (*f)(subject*));
 static int parse_inline(subject* subj, node_inl ** last);
 
-static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap);
-static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap);
+static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap);
+static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap);
 static int subject_find_special_char(subject *subj);
 
-static void normalize_whitespace(strbuf *s);
-
-extern void free_reference(reference *ref) {
-	free(ref->label);
-	free(ref->url);
-	free(ref->title);
-	free(ref);
-}
-
-extern void free_reference_map(reference **refmap) {
-	/* free the hash table contents */
-	reference *s;
-	reference *tmp;
-	if (refmap != NULL) {
-		HASH_ITER(hh, *refmap, s, tmp) {
-			HASH_DEL(*refmap, s);
-			free_reference(s);
-		}
-		free(refmap);
-	}
-}
-
-// normalize reference:  collapse internal whitespace to single space,
-// remove leading/trailing whitespace, case fold
-static unsigned char *normalize_reference(chunk *ref)
-{
-	strbuf normalized = GH_BUF_INIT;
-
-	utf8proc_case_fold(&normalized, ref->data, ref->len);
-	strbuf_trim(&normalized);
-	normalize_whitespace(&normalized);
-
-	return strbuf_detach(&normalized);
-}
-
-// Returns reference if refmap contains a reference with matching
-// label, otherwise NULL.
-extern reference* lookup_reference(reference** refmap, chunk *label)
-{
-	reference *ref = NULL;
-	unsigned char *norm = normalize_reference(label);
-	if (refmap != NULL) {
-		HASH_FIND_STR(*refmap, (char*)norm, ref);
-	}
-	free(norm);
-	return ref;
-}
-
-extern reference* make_reference(chunk *label, chunk *url, chunk *title)
-{
-	reference *ref;
-	ref = malloc(sizeof(reference));
-	ref->label = normalize_reference(label);
-	ref->url = clean_url(url);
-	ref->title = clean_title(title);
-	return ref;
-}
-
-extern void add_reference(reference** refmap, reference* ref)
-{
-	reference * t = NULL;
-	const char *label = (const char *)ref->label;
-
-	HASH_FIND(hh, *refmap, label, strlen(label), t);
-
-	if (t == NULL) {
-		HASH_ADD_KEYPTR(hh, *refmap, label, strlen(label), ref);
-	} else {
-		free_reference(ref);  // we free this now since it won't be in the refmap
-	}
-}
-
 static unsigned char *bufdup(const unsigned char *buf)
 {
 	unsigned char *new = NULL;
@@ -236,26 +149,26 @@ inline static node_inl* append_inlines(node_inl* a, node_inl* b)
 	return a;
 }
 
-static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap)
+static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap)
 {
 	e->input.data = buffer->ptr;
 	e->input.len = buffer->size;
 	e->input.alloc = 0;
 	e->pos = 0;
 	e->label_nestlevel = 0;
-	e->reference_map = refmap;
+	e->refmap = refmap;
 
 	chunk_rtrim(&e->input);
 }
 
-static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap)
+static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap)
 {
 	e->input.data = chunk->data;
 	e->input.len = chunk->len;
 	e->input.alloc = 0;
 	e->pos = 0;
 	e->label_nestlevel = 0;
-	e->reference_map = refmap;
+	e->refmap = refmap;
 
 	chunk_rtrim(&e->input);
 }
@@ -325,33 +238,6 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
 	return (subj->pos);
 }
 
-// Destructively modify string, collapsing consecutive
-// space and newline characters into a single space.
-static void normalize_whitespace(strbuf *s)
-{
-	bool last_char_was_space = false;
-	int r, w;
-
-	for (r = 0, w = 0; r < s->size; ++r) {
-		switch (s->ptr[r]) {
-		case ' ':
-		case '\n':
-			if (last_char_was_space)
-				break;
-
-			s->ptr[w++] = ' ';
-			last_char_was_space = true;
-			break;
-
-		default:
-			s->ptr[w++] = s->ptr[r];
-			last_char_was_space = false;
-		}
-	}
-
-	strbuf_truncate(s, w);
-}
-
 // Parse backtick code section or raw backticks, return an inline.
 // Assumes that the subject has a backtick at the current position.
 static node_inl* handle_backticks(subject *subj)
@@ -368,7 +254,7 @@ static node_inl* handle_backticks(subject *subj)
 
 		strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
 		strbuf_trim(&buf);
-		normalize_whitespace(&buf);
+		strbuf_normalize_whitespace(&buf);
 
 		return make_code(chunk_buf_detach(&buf));
 	}
@@ -575,24 +461,9 @@ static node_inl *make_str_with_entities(chunk *content)
 	}
 }
 
-// Destructively unescape a string: remove backslashes before punctuation chars.
-extern void unescape_buffer(strbuf *buf)
-{
-	int r, w;
-
-	for (r = 0, w = 0; r < buf->size; ++r) {
-		if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1]))
-			continue;
-
-		buf->ptr[w++] = buf->ptr[r];
-	}
-
-	strbuf_truncate(buf, w);
-}
-
 // Clean a URL: remove surrounding whitespace and surrounding <>,
 // and remove \ that escape punctuation.
-static unsigned char *clean_url(chunk *url)
+unsigned char *clean_url(chunk *url)
 {
 	strbuf buf = GH_BUF_INIT;
 
@@ -607,11 +478,11 @@ static unsigned char *clean_url(chunk *url)
 		houdini_unescape_html_f(&buf, url->data, url->len);
 	}
 
-	unescape_buffer(&buf);
+	strbuf_unescape(&buf);
 	return strbuf_detach(&buf);
 }
 
-static unsigned char *clean_autolink(chunk *url, int is_email)
+unsigned char *clean_autolink(chunk *url, int is_email)
 {
 	strbuf buf = GH_BUF_INIT;
 
@@ -628,7 +499,7 @@ static unsigned char *clean_autolink(chunk *url, int is_email)
 }
 
 // Clean a title: remove surrounding quotes and remove \ that escape punctuation.
-static unsigned char *clean_title(chunk *title)
+unsigned char *clean_title(chunk *title)
 {
 	strbuf buf = GH_BUF_INIT;
 	unsigned char first, last;
@@ -648,7 +519,7 @@ static unsigned char *clean_title(chunk *title)
 		houdini_unescape_html_f(&buf, title->data, title->len);
 	}
 
-	unescape_buffer(&buf);
+	strbuf_unescape(&buf);
 	return strbuf_detach(&buf);
 }
 
@@ -810,7 +681,7 @@ static node_inl* handle_left_bracket(subject* subj)
 			} else {
 				// if we get here, we matched a label but didn't get further:
 				subj->pos = endlabel;
-				lab = parse_chunk_inlines(&rawlabel, subj->reference_map);
+				lab = parse_chunk_inlines(&rawlabel, subj->refmap);
 				result = append_inlines(make_str(chunk_literal("[")),
 						append_inlines(lab,
 							make_str(chunk_literal("]"))));
@@ -834,13 +705,13 @@ static node_inl* handle_left_bracket(subject* subj)
 			}
 
 			// lookup rawlabel in subject->reference_map:
-			ref = lookup_reference(subj->reference_map, &reflabel);
+			ref = reference_lookup(subj->refmap, &reflabel);
 			if (ref != NULL) { // found
 				lab = parse_chunk_inlines(&rawlabel, NULL);
 				result = make_ref_link(lab, ref);
 			} else {
 				subj->pos = endlabel;
-				lab = parse_chunk_inlines(&rawlabel, subj->reference_map);
+				lab = parse_chunk_inlines(&rawlabel, subj->refmap);
 				result = append_inlines(make_str(chunk_literal("[")),
 						append_inlines(lab, make_str(chunk_literal("]"))));
 			}
@@ -887,7 +758,7 @@ extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*))
 	return result;
 }
 
-node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap)
+node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap)
 {
 	subject subj;
 	subject_from_chunk(&subj, chunk, refmap);
@@ -987,7 +858,7 @@ static int parse_inline(subject* subj, node_inl ** last)
 	return 1;
 }
 
-extern node_inl* parse_inlines(strbuf *input, reference** refmap)
+extern node_inl* parse_inlines(strbuf *input, reference_map *refmap)
 {
 	subject subj;
 	subject_from_buf(&subj, input, refmap);
@@ -1009,7 +880,7 @@ void spnl(subject* subj)
 // Modify refmap if a reference is encountered.
 // Return 0 if no reference found, otherwise position of subject
 // after reference is parsed.
-extern int parse_reference(strbuf *input, reference** refmap)
+int parse_reference_inline(strbuf *input, reference_map *refmap)
 {
 	subject subj;
 
@@ -1019,7 +890,6 @@ extern int parse_reference(strbuf *input, reference** refmap)
 
 	int matchlen = 0;
 	int beforetitle;
-	reference *new = NULL;
 
 	subject_from_buf(&subj, input, NULL);
 
@@ -1065,9 +935,7 @@ extern int parse_reference(strbuf *input, reference** refmap)
 		return 0;
 	}
 	// insert reference into refmap
-	new = make_reference(&lab, &url, &title);
-	add_reference(refmap, new);
-
+	reference_create(refmap, &lab, &url, &title);
 	return subj.pos;
 }
 
diff --git a/src/references.c b/src/references.c
new file mode 100644
index 0000000..ff64b00
--- /dev/null
+++ b/src/references.c
@@ -0,0 +1,109 @@
+#include "stmd.h"
+#include "utf8.h"
+#include "references.h"
+
+static unsigned int
+refhash(const unsigned char *link_ref)
+{
+	unsigned int hash = 0;
+
+	while (*link_ref)
+		hash = (*link_ref++) + (hash << 6) + (hash << 16) - hash;
+
+	return hash;
+}
+
+// normalize reference:  collapse internal whitespace to single space,
+// remove leading/trailing whitespace, case fold
+static unsigned char *normalize_reference(chunk *ref)
+{
+	strbuf normalized = GH_BUF_INIT;
+
+	utf8proc_case_fold(&normalized, ref->data, ref->len);
+	strbuf_trim(&normalized);
+	strbuf_normalize_whitespace(&normalized);
+
+	return strbuf_detach(&normalized);
+}
+
+static void add_reference(reference_map *map, reference* ref)
+{
+	ref->next = map->table[ref->hash % REFMAP_SIZE];
+	map->table[ref->hash % REFMAP_SIZE] = ref;
+}
+
+extern reference *reference_create(reference_map *map, chunk *label, chunk *url, chunk *title)
+{
+	reference *ref;
+	ref = malloc(sizeof(reference));
+	ref->label = normalize_reference(label);
+	ref->hash = refhash(ref->label);
+	ref->url = clean_url(url);
+	ref->title = clean_title(title);
+	ref->next = NULL;
+
+	add_reference(map, ref);
+
+	return ref;
+}
+
+// Returns reference if refmap contains a reference with matching
+// label, otherwise NULL.
+reference* reference_lookup(reference_map *map, chunk *label)
+{
+	reference *ref = NULL;
+	unsigned char *norm;
+	unsigned int hash;
+	
+	if (map == NULL)
+		return NULL;
+	
+	norm = normalize_reference(label);
+	hash = refhash(norm);
+	ref = map->table[hash % REFMAP_SIZE];
+
+	while (ref) {
+		if (ref->label[0] == norm[0] &&
+			!strcmp((char *)ref->label, (char *)norm))
+			break;
+		ref = ref->next;
+	}
+
+	free(norm);
+	return ref;
+}
+
+static void reference_free(reference *ref)
+{
+	free(ref->label);
+	free(ref->url);
+	free(ref->title);
+	free(ref);
+}
+
+void reference_map_free(reference_map *map)
+{
+	unsigned int i;
+
+	for (i = 0; i < REFMAP_SIZE; ++i) {
+		reference *ref = map->table[i];
+		reference *next;
+
+		while (ref) {
+			next = ref->next;
+			reference_free(ref);
+			ref = next;
+		}
+	}
+
+	free(map->table);
+	free(map);
+}
+
+reference_map *reference_map_new(void)
+{
+	reference_map *map = malloc(sizeof(reference_map));
+	memset(map, 0x0, sizeof(reference_map));
+	return map;
+}
+
diff --git a/src/references.h b/src/references.h
new file mode 100644
index 0000000..78fffe7
--- /dev/null
+++ b/src/references.h
@@ -0,0 +1,27 @@
+#ifndef _REFERENCES_H_
+#define _REFERENCES_H_
+
+#define REFMAP_SIZE 16
+
+struct reference {
+	struct reference *next;
+	unsigned char *label;
+	unsigned char *url;
+	unsigned char *title;
+	unsigned int hash;
+};
+
+typedef struct reference reference;
+
+struct reference_map {
+	reference *table[REFMAP_SIZE];
+};
+
+typedef struct reference_map reference_map;
+
+reference_map *reference_map_new(void);
+void reference_map_free(reference_map *map);
+reference* reference_lookup(reference_map *map, chunk *label);
+extern reference *reference_create(reference_map *map, chunk *label, chunk *url, chunk *title);
+
+#endif
diff --git a/src/stmd.h b/src/stmd.h
index 21a86b0..4e21e6c 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -5,7 +5,7 @@
 #include <stdio.h>
 #include "buffer.h"
 #include "chunk.h"
-#include "uthash.h"
+#include "references.h"
 
 #define VERSION "0.1"
 #define CODE_INDENT 4
@@ -36,17 +36,7 @@ struct node_inl {
 
 typedef struct node_inl node_inl;
 
-struct reference {
-  unsigned char *label;
-  unsigned char *url;
-  unsigned char *title;
-  UT_hash_handle  hh; // used by uthash
-};
-
-typedef struct reference reference;
-
 // Types for blocks
-
 struct ListData {
 	enum {
 		bullet,
@@ -104,7 +94,7 @@ struct node_block {
 			int level;
 		} header;
 		struct {
-			reference** refmap;
+			reference_map *refmap;
 		} document;
 	} as;
 
@@ -114,14 +104,10 @@ struct node_block {
 
 typedef struct node_block node_block;
 
-node_inl* parse_inlines(strbuf *input, reference** refmap);
+node_inl* parse_inlines(strbuf *input, reference_map *refmap);
 void free_inlines(node_inl* e);
 
-int parse_reference(strbuf *input, reference** refmap);
-void free_reference(reference *ref);
-void free_reference_map(reference **refmap);
-
-void add_reference(reference** refmap, reference* ref);
+int parse_reference_inline(strbuf *input, reference_map *refmap);
 void unescape_buffer(strbuf *buf);
 
 extern node_block* make_document();
@@ -138,4 +124,8 @@ void print_blocks(node_block* blk, int indent);
 void blocks_to_html(strbuf *html, node_block *b, bool tight);
 void inlines_to_html(strbuf *html, node_inl *b);
 
+unsigned char *clean_url(chunk *url);
+unsigned char *clean_autolink(chunk *url, int is_email);
+unsigned char *clean_title(chunk *title);
+
 #endif
diff --git a/src/utf8.c b/src/utf8.c
index 12d7ba5..c65aec6 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -25,7 +25,7 @@ static const int8_t utf8proc_utf8class[256] = {
 
 static void encode_unknown(strbuf *buf)
 {
-	static const unsigned char repl[] = {239, 191, 189};
+	static const uint8_t repl[] = {239, 191, 189};
 	strbuf_put(buf, repl, 3);
 }
 
@@ -52,9 +52,9 @@ ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len)
 	return length;
 }
 
-void utf8proc_detab(strbuf *ob, const unsigned char *line, size_t size)
+void utf8proc_detab(strbuf *ob, const uint8_t *line, size_t size)
 {
-	static const unsigned char whitespace[] = "    ";
+	static const uint8_t whitespace[] = "    ";
 
 	size_t i = 0, tab = 0;
 
@@ -132,7 +132,7 @@ ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst)
 
 void utf8proc_encode_char(int32_t uc, strbuf *buf)
 {
-	unsigned char dst[4];
+	uint8_t dst[4];
 	int len = 0;
 
 	assert(uc >= 0);
@@ -169,7 +169,7 @@ void utf8proc_encode_char(int32_t uc, strbuf *buf)
 	strbuf_put(buf, dst, len);
 }
 
-void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len)
+void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len)
 {
 	int32_t c;
 
diff --git a/src/utf8.h b/src/utf8.h
index 1e4e556..9506b75 100644
--- a/src/utf8.h
+++ b/src/utf8.h
@@ -1,12 +1,13 @@
 #ifndef _H_STMD_UTF8_
 #define _H_STMD_UTF8_
 
+#include <stdint.h>
 #include "buffer.h"
 
-void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len);
+void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len);
 void utf8proc_encode_char(int32_t uc, strbuf *buf);
 ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst);
 ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len);
-void utf8proc_detab(strbuf *dest, const unsigned char *line, size_t size);
+void utf8proc_detab(strbuf *dest, const uint8_t *line, size_t size);
 
 #endif
-- 
cgit v1.2.3


From 7c2a062cdf9c0514cdf32f4f8bd07cf52d183c8b Mon Sep 17 00:00:00 2001
From: Vicent Marti <tanoku@gmail.com>
Date: Wed, 10 Sep 2014 19:46:34 +0200
Subject: Do not use strchr for span searches

Strchr will return a valid pointer for '\0' when searching a static
string, as the NULL byte is part of the string.
---
 src/inlines.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'src/inlines.c')

diff --git a/src/inlines.c b/src/inlines.c
index 3040f09..cd2d124 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -767,10 +767,13 @@ node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap)
 
 static int subject_find_special_char(subject *subj)
 {
+	static const char CHARS[] = "\n\\`&_*[]<!";
+	static const size_t CHARS_SIZE = sizeof(CHARS) - 1;
+
 	int n = subj->pos + 1;
 
 	while (n < subj->input.len) {
-		if (strchr("\n\\`&_*[]<!", subj->input.data[n]))
+		if (memchr(CHARS, subj->input.data[n], CHARS_SIZE))
 			return n;
 		n++;
 	}
-- 
cgit v1.2.3


From 118e3d3c39242225baa876319cdbfbb1adadc77b Mon Sep 17 00:00:00 2001
From: Vicent Marti <tanoku@gmail.com>
Date: Mon, 15 Sep 2014 15:28:49 +0200
Subject: Cleanup external APIs

---
 src/blocks.c     |  11 ++--
 src/html/html.c  | 163 ++++++++++++++++++++++++++++---------------------------
 src/inlines.c    |   1 +
 src/main.c       |   8 +--
 src/print.c      | 114 +++++++++++++++++++-------------------
 src/references.c |   1 +
 src/stmd.h       |  26 ++-------
 7 files changed, 159 insertions(+), 165 deletions(-)

(limited to 'src/inlines.c')

diff --git a/src/blocks.c b/src/blocks.c
index 30a8284..2ac7032 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -6,8 +6,9 @@
 
 #include "stmd.h"
 #include "utf8.h"
-#include "html/houdini.h"
 #include "scanners.h"
+#include "inlines.h"
+#include "html/houdini.h"
 
 #define peek_at(i, n) (i)->data[n]
 
@@ -224,7 +225,7 @@ static void finalize(node_block* b, int line_number)
 }
 
 // Add a node_block as child of another.  Return pointer to child.
-extern node_block* add_child(node_block* parent,
+static node_block* add_child(node_block* parent,
 		int block_type, int start_line, int start_column)
 {
 	assert(parent);
@@ -252,7 +253,7 @@ extern node_block* add_child(node_block* parent,
 }
 
 // Free a node_block list and any children.
-extern void free_blocks(node_block* e)
+void stmd_free_nodes(node_block *e)
 {
 	node_block * next;
 	while (e != NULL) {
@@ -264,7 +265,7 @@ extern void free_blocks(node_block* e)
 		} else if (e->tag == BLOCK_DOCUMENT) {
 			reference_map_free(e->as.document.refmap);
 		}
-		free_blocks(e->children);
+		stmd_free_nodes(e->children);
 		free(e);
 		e = next;
 	}
@@ -279,8 +280,6 @@ void process_inlines(node_block* cur, reference_map *refmap)
 		case BLOCK_ATX_HEADER:
 		case BLOCK_SETEXT_HEADER:
 			cur->inline_content = parse_inlines(&cur->string_content, refmap);
-			// MEM
-			// strbuf_free(&cur->string_content);
 			break;
 
 		default:
diff --git a/src/html/html.c b/src/html/html.c
index b48b10b..6f3bc76 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -32,8 +32,89 @@ static inline void cr(strbuf *html)
 		strbuf_putc(html, '\n');
 }
 
+// Convert an inline list to HTML.  Returns 0 on success, and sets result.
+static void inlines_to_html(strbuf *html, node_inl* ils)
+{
+	strbuf scrap = GH_BUF_INIT;
+
+	while(ils != NULL) {
+		switch(ils->tag) {
+			case INL_STRING:
+				escape_html(html, ils->content.literal.data, ils->content.literal.len);
+				break;
+
+			case INL_LINEBREAK:
+				strbuf_puts(html, "<br />\n");
+				break;
+
+			case INL_SOFTBREAK:
+				strbuf_putc(html, '\n');
+				break;
+
+			case INL_CODE:
+				strbuf_puts(html, "<code>");
+				escape_html(html, ils->content.literal.data, ils->content.literal.len);
+				strbuf_puts(html, "</code>");
+				break;
+
+			case INL_RAW_HTML:
+				strbuf_put(html,
+						ils->content.literal.data,
+						ils->content.literal.len);
+				break;
+
+			case INL_LINK:
+				strbuf_puts(html, "<a href=\"");
+				if (ils->content.linkable.url)
+					escape_href(html, ils->content.linkable.url, -1);
+
+				if (ils->content.linkable.title) {
+					strbuf_puts(html, "\" title=\"");
+					escape_html(html, ils->content.linkable.title, -1);
+				}
+
+				strbuf_puts(html, "\">");
+				inlines_to_html(html, ils->content.inlines);
+				strbuf_puts(html, "</a>");
+				break;
+
+			case INL_IMAGE:
+				strbuf_puts(html, "<img src=\"");
+				if (ils->content.linkable.url)
+					escape_href(html, ils->content.linkable.url, -1);
+
+				inlines_to_html(&scrap, ils->content.inlines);
+				strbuf_puts(html, "\" alt=\"");
+				if (scrap.size)
+					escape_html(html, scrap.ptr, scrap.size);
+				strbuf_clear(&scrap);
+
+				if (ils->content.linkable.title) {
+					strbuf_puts(html, "\" title=\"");
+					escape_html(html, ils->content.linkable.title, -1);
+				}
+
+				strbuf_puts(html, "\"/>");
+				break;
+
+			case INL_STRONG:
+				strbuf_puts(html, "<strong>");
+				inlines_to_html(html, ils->content.inlines);
+				strbuf_puts(html, "</strong>");
+				break;
+
+			case INL_EMPH:
+				strbuf_puts(html, "<em>");
+				inlines_to_html(html, ils->content.inlines);
+				strbuf_puts(html, "</em>");
+				break;
+		}
+		ils = ils->next;
+	}
+}
+
 // Convert a node_block list to HTML.  Returns 0 on success, and sets result.
-void blocks_to_html(strbuf *html, node_block *b, bool tight)
+static void blocks_to_html(strbuf *html, node_block *b, bool tight)
 {
 	struct ListData *data;
 
@@ -139,83 +220,7 @@ void blocks_to_html(strbuf *html, node_block *b, bool tight)
 	}
 }
 
-// Convert an inline list to HTML.  Returns 0 on success, and sets result.
-void inlines_to_html(strbuf *html, node_inl* ils)
+void stmd_render_html(strbuf *html, node_block *root)
 {
-	strbuf scrap = GH_BUF_INIT;
-
-	while(ils != NULL) {
-		switch(ils->tag) {
-			case INL_STRING:
-				escape_html(html, ils->content.literal.data, ils->content.literal.len);
-				break;
-
-			case INL_LINEBREAK:
-				strbuf_puts(html, "<br />\n");
-				break;
-
-			case INL_SOFTBREAK:
-				strbuf_putc(html, '\n');
-				break;
-
-			case INL_CODE:
-				strbuf_puts(html, "<code>");
-				escape_html(html, ils->content.literal.data, ils->content.literal.len);
-				strbuf_puts(html, "</code>");
-				break;
-
-			case INL_RAW_HTML:
-				strbuf_put(html,
-						ils->content.literal.data,
-						ils->content.literal.len);
-				break;
-
-			case INL_LINK:
-				strbuf_puts(html, "<a href=\"");
-				if (ils->content.linkable.url)
-					escape_href(html, ils->content.linkable.url, -1);
-
-				if (ils->content.linkable.title) {
-					strbuf_puts(html, "\" title=\"");
-					escape_html(html, ils->content.linkable.title, -1);
-				}
-
-				strbuf_puts(html, "\">");
-				inlines_to_html(html, ils->content.inlines);
-				strbuf_puts(html, "</a>");
-				break;
-
-			case INL_IMAGE:
-				strbuf_puts(html, "<img src=\"");
-				if (ils->content.linkable.url)
-					escape_href(html, ils->content.linkable.url, -1);
-
-				inlines_to_html(&scrap, ils->content.inlines);
-				strbuf_puts(html, "\" alt=\"");
-				if (scrap.size)
-					escape_html(html, scrap.ptr, scrap.size);
-				strbuf_clear(&scrap);
-
-				if (ils->content.linkable.title) {
-					strbuf_puts(html, "\" title=\"");
-					escape_html(html, ils->content.linkable.title, -1);
-				}
-
-				strbuf_puts(html, "\"/>");
-				break;
-
-			case INL_STRONG:
-				strbuf_puts(html, "<strong>");
-				inlines_to_html(html, ils->content.inlines);
-				strbuf_puts(html, "</strong>");
-				break;
-
-			case INL_EMPH:
-				strbuf_puts(html, "<em>");
-				inlines_to_html(html, ils->content.inlines);
-				strbuf_puts(html, "</em>");
-				break;
-		}
-		ils = ils->next;
-	}
+	blocks_to_html(html, root, false);
 }
diff --git a/src/inlines.c b/src/inlines.c
index cd2d124..145825c 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -8,6 +8,7 @@
 #include "html/houdini.h"
 #include "utf8.h"
 #include "scanners.h"
+#include "inlines.h"
 
 typedef struct Subject {
 	chunk input;
diff --git a/src/main.c b/src/main.c
index 90bb16d..76a0e12 100644
--- a/src/main.c
+++ b/src/main.c
@@ -17,9 +17,9 @@ static void print_document(node_block *document, bool ast)
 	strbuf html = GH_BUF_INIT;
 
 	if (ast) {
-		print_blocks(document, 0);
+		stmd_debug_print(document);
 	} else {
-		blocks_to_html(&html, document, false);
+		stmd_render_html(&html, document);
 		printf("%s", html.ptr);
 		strbuf_free(&html);
 	}
@@ -54,7 +54,7 @@ int main(int argc, char *argv[])
 	if (numfps == 0) {
 		document = stmd_parse_file(stdin);
 		print_document(document, ast);
-		free_blocks(document);
+		stmd_free_nodes(document);
 	} else {
 		for (i = 0; i < numfps; i++) {
 			FILE *fp = fopen(argv[files[i]], "r");
@@ -67,7 +67,7 @@ int main(int argc, char *argv[])
 
 			document = stmd_parse_file(fp);
 			print_document(document, ast);
-			free_blocks(document);
+			stmd_free_nodes(document);
 			fclose(fp);
 		}
 	}
diff --git a/src/print.c b/src/print.c
index 36140a8..83f8daa 100644
--- a/src/print.c
+++ b/src/print.c
@@ -32,14 +32,69 @@ static void print_str(const unsigned char *s, int len)
 	putchar('"');
 }
 
+// Prettyprint an inline list, for debugging.
+static void print_inlines(node_inl* ils, int indent)
+{
+	while(ils != NULL) {
+		for (int i=0; i < indent; i++) {
+			putchar(' ');
+		}
+		switch(ils->tag) {
+		case INL_STRING:
+			printf("str ");
+			print_str(ils->content.literal.data, ils->content.literal.len);
+			putchar('\n');
+			break;
+		case INL_LINEBREAK:
+			printf("linebreak\n");
+			break;
+		case INL_SOFTBREAK:
+			printf("softbreak\n");
+			break;
+		case INL_CODE:
+			printf("code ");
+			print_str(ils->content.literal.data, ils->content.literal.len);
+			putchar('\n');
+			break;
+		case INL_RAW_HTML:
+			printf("html ");
+			print_str(ils->content.literal.data, ils->content.literal.len);
+			putchar('\n');
+			break;
+		case INL_LINK:
+		case INL_IMAGE:
+			printf("%s url=", ils->tag == INL_LINK ? "link" : "image");
+
+			if (ils->content.linkable.url)
+				print_str(ils->content.linkable.url, -1);
+
+			if (ils->content.linkable.title) {
+				printf(" title=");
+				print_str(ils->content.linkable.title, -1);
+			}
+			putchar('\n');
+			print_inlines(ils->content.linkable.label, indent + 2);
+			break;
+		case INL_STRONG:
+			printf("strong\n");
+			print_inlines(ils->content.linkable.label, indent + 2);
+			break;
+		case INL_EMPH:
+			printf("emph\n");
+			print_inlines(ils->content.linkable.label, indent + 2);
+			break;
+		}
+		ils = ils->next;
+	}
+}
+
 // Functions to pretty-print inline and node_block lists, for debugging.
 // Prettyprint an inline list, for debugging.
-extern void print_blocks(node_block* b, int indent)
+static void print_blocks(node_block* b, int indent)
 {
 	struct ListData *data;
 
 	while(b != NULL) {
-		// printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line);
 		for (int i=0; i < indent; i++) {
 			putchar(' ');
 		}
@@ -115,58 +170,7 @@ extern void print_blocks(node_block* b, int indent)
 	}
 }
 
-// Prettyprint an inline list, for debugging.
-extern void print_inlines(node_inl* ils, int indent)
+void stmd_debug_print(node_block *root)
 {
-	while(ils != NULL) {
-		for (int i=0; i < indent; i++) {
-			putchar(' ');
-		}
-		switch(ils->tag) {
-		case INL_STRING:
-			printf("str ");
-			print_str(ils->content.literal.data, ils->content.literal.len);
-			putchar('\n');
-			break;
-		case INL_LINEBREAK:
-			printf("linebreak\n");
-			break;
-		case INL_SOFTBREAK:
-			printf("softbreak\n");
-			break;
-		case INL_CODE:
-			printf("code ");
-			print_str(ils->content.literal.data, ils->content.literal.len);
-			putchar('\n');
-			break;
-		case INL_RAW_HTML:
-			printf("html ");
-			print_str(ils->content.literal.data, ils->content.literal.len);
-			putchar('\n');
-			break;
-		case INL_LINK:
-		case INL_IMAGE:
-			printf("%s url=", ils->tag == INL_LINK ? "link" : "image");
-
-			if (ils->content.linkable.url)
-				print_str(ils->content.linkable.url, -1);
-
-			if (ils->content.linkable.title) {
-				printf(" title=");
-				print_str(ils->content.linkable.title, -1);
-			}
-			putchar('\n');
-			print_inlines(ils->content.linkable.label, indent + 2);
-			break;
-		case INL_STRONG:
-			printf("strong\n");
-			print_inlines(ils->content.linkable.label, indent + 2);
-			break;
-		case INL_EMPH:
-			printf("emph\n");
-			print_inlines(ils->content.linkable.label, indent + 2);
-			break;
-		}
-		ils = ils->next;
-	}
+	print_blocks(root, 0);
 }
diff --git a/src/references.c b/src/references.c
index 300bbcc..3e54b48 100644
--- a/src/references.c
+++ b/src/references.c
@@ -1,6 +1,7 @@
 #include "stmd.h"
 #include "utf8.h"
 #include "references.h"
+#include "inlines.h"
 
 static unsigned int
 refhash(const unsigned char *link_ref)
diff --git a/src/stmd.h b/src/stmd.h
index 4e21e6c..c6473a6 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -104,28 +104,12 @@ struct node_block {
 
 typedef struct node_block node_block;
 
-node_inl* parse_inlines(strbuf *input, reference_map *refmap);
-void free_inlines(node_inl* e);
+node_block *stmd_parse_document(const unsigned char *buffer, size_t len);
+node_block *stmd_parse_file(FILE *f);
 
-int parse_reference_inline(strbuf *input, reference_map *refmap);
-void unescape_buffer(strbuf *buf);
+void stmd_free_nodes(node_block *e);
 
-extern node_block* make_document();
-extern node_block* add_child(node_block* parent,
-                        int block_type, int start_line, int start_column);
-void free_blocks(node_block* e);
-
-extern node_block *stmd_parse_document(const unsigned char *buffer, size_t len);
-extern node_block *stmd_parse_file(FILE *f);
-
-void print_inlines(node_inl* ils, int indent);
-void print_blocks(node_block* blk, int indent);
-
-void blocks_to_html(strbuf *html, node_block *b, bool tight);
-void inlines_to_html(strbuf *html, node_inl *b);
-
-unsigned char *clean_url(chunk *url);
-unsigned char *clean_autolink(chunk *url, int is_email);
-unsigned char *clean_title(chunk *title);
+void stmd_debug_print(node_block *root);
+void stmd_render_html(strbuf *html, node_block *root);
 
 #endif
-- 
cgit v1.2.3