From e216094e2192c05ddbd0988458eb8c0012e7baf8 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Tue, 2 Sep 2014 01:10:54 +0200
Subject: lol
---
src/inlines.c | 1711 ++++++++++++++++++++++++++++++---------------------------
1 file changed, 908 insertions(+), 803 deletions(-)
(limited to 'src/inlines.c')
diff --git a/src/inlines.c b/src/inlines.c
index f75c846..4ff45ad 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -2,133 +2,154 @@
#include
#include
#include
-#include "bstrlib.h"
+#include
+
#include "stmd.h"
#include "uthash.h"
#include "debug.h"
#include "scanners.h"
#include "utf8.h"
+typedef struct Subject {
+ const gh_buf *buffer;
+ int pos;
+ reference** reference_map;
+ int label_nestlevel;
+} subject;
+
+reference* lookup_reference(reference** refmap, chunk *label);
+reference* make_reference(chunk *label, chunk *url, chunk *title);
+
+static unsigned char *clean_url(chunk *url);
+static unsigned char *clean_title(chunk *title);
+
+inline static unsigned char *chunk_to_cstr(chunk *c);
+inline static void chunk_free(chunk *c);
+inline static void chunk_trim(chunk *c);
+
+inline static chunk chunk_literal(const char *data);
+inline static chunk chunk_buf_detach(gh_buf *buf);
+inline static chunk chunk_buf(const gh_buf *buf, int pos, int len);
+
+static inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
+static inl *parse_inlines_while(subject* subj, int (*f)(subject*));
+static int parse_inline(subject* subj, inl ** last);
+
extern void free_reference(reference *ref) {
- bdestroy(ref->label);
- bdestroy(ref->url);
- bdestroy(ref->title);
- free(ref);
+ free(ref->label);
+ free(ref->url);
+ free(ref->title);
+ free(ref);
}
extern void free_reference_map(reference **refmap) {
- /* free the hash table contents */
- reference *s;
- reference *tmp;
- if (refmap != NULL) {
- HASH_ITER(hh, *refmap, s, tmp) {
- HASH_DEL(*refmap, s);
- free_reference(s);
- }
- free(refmap);
- }
+ /* free the hash table contents */
+ reference *s;
+ reference *tmp;
+ if (refmap != NULL) {
+ HASH_ITER(hh, *refmap, s, tmp) {
+ HASH_DEL(*refmap, s);
+ free_reference(s);
+ }
+ free(refmap);
+ }
}
// normalize reference: collapse internal whitespace to single space,
// remove leading/trailing whitespace, case fold
-static bstring normalize_reference(bstring s)
-{
- bstring normalized = case_fold(s);
- int pos = 0;
- int startpos;
- char c;
- while ((c = bchar(normalized, pos))) {
- if (isspace(c)) {
- startpos = pos;
- // skip til next non-space
- pos++;
- while (isspace(bchar(s, pos))) {
- pos++;
- }
- bdelete(normalized, startpos, pos - startpos);
- binsertch(normalized, startpos, 1, ' ');
- pos = startpos + 1;
- }
- pos++;
- }
- btrimws(normalized);
- return normalized;
+static unsigned char *normalize_reference(chunk *ref)
+{
+ gh_buf normalized = GH_BUF_INIT;
+ int r, w;
+
+ utf8proc_case_fold(&normalized, ref->data, ref->len);
+ gh_buf_trim(&normalized);
+
+ for (r = 0, w = 0; r < normalized.size; ++r) {
+ if (r && gh_buf_at(&normalized, r - 1) == ' ') {
+ while (gh_buf_at(&normalized, r) == ' ')
+ r++;
+ }
+
+ normalized.ptr[w++] = normalized.ptr[r];
+ }
+
+ return gh_buf_detach(&normalized);
}
// Returns reference if refmap contains a reference with matching
// label, otherwise NULL.
-extern reference* lookup_reference(reference** refmap, bstring lab)
+extern reference* lookup_reference(reference** refmap, chunk *label)
{
- reference * ref = NULL;
- bstring label = normalize_reference(lab);
- if (refmap != NULL) {
- HASH_FIND_STR(*refmap, (char*) label->data, ref);
- }
- bdestroy(label);
- return ref;
+ reference *ref = NULL;
+ unsigned char *norm = normalize_reference(label);
+ if (refmap != NULL) {
+ HASH_FIND_STR(*refmap, (char*)norm, ref);
+ }
+ free(label);
+ return ref;
}
-extern reference* make_reference(bstring label, bstring url, bstring title)
+extern reference* make_reference(chunk *label, chunk *url, chunk *title)
{
- reference * ref;
- ref = malloc(sizeof(reference));
- ref->label = normalize_reference(label);
- ref->url = bstrcpy(url);
- ref->title = bstrcpy(title);
- return ref;
+ reference *ref;
+ ref = malloc(sizeof(reference));
+ ref->label = normalize_reference(label);
+ ref->url = clean_url(url);
+ ref->title = clean_title(title);
+ return ref;
}
extern void add_reference(reference** refmap, reference* ref)
{
- reference * t = NULL;
- HASH_FIND(hh, *refmap, (char*) ref->label->data,
- (unsigned) blength(ref->label), t);
- if (t == NULL) {
- HASH_ADD_KEYPTR(hh, *refmap, (char*) ref->label->data,
- (unsigned) blength(ref->label), ref);
- } else {
- free_reference(ref); // we free this now since it won't be in the refmap
- }
+ reference * t = NULL;
+ HASH_FIND(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), t);
+
+ if (t == NULL) {
+ HASH_ADD_KEYPTR(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), ref);
+ } else {
+ free_reference(ref); // we free this now since it won't be in the refmap
+ }
}
// Create an inline with a linkable string value.
-inline static inl* make_linkable(int t, inl* label, bstring url, bstring title)
+inline static inl* make_linkable(int t, inl* label, chunk url, chunk title)
{
- inl* e = (inl*) malloc(sizeof(inl));
- e->tag = t;
- e->content.linkable.label = label;
- e->content.linkable.url = url;
- e->content.linkable.title = title;
- e->next = NULL;
- return e;
+ inl* e = (inl*) malloc(sizeof(inl));
+ e->tag = t;
+ e->content.linkable.label = label;
+ e->content.linkable.url = chunk_to_cstr(&url);
+ e->content.linkable.title = chunk_to_cstr(&title);
+ e->next = NULL;
+ return e;
}
inline static inl* make_inlines(int t, inl* contents)
{
- inl* e = (inl*) malloc(sizeof(inl));
- e->tag = t;
- e->content.inlines = contents;
- e->next = NULL;
- return e;
+ inl* e = (inl*) malloc(sizeof(inl));
+ e->tag = t;
+ e->content.inlines = contents;
+ e->next = NULL;
+ return e;
}
// Create an inline with a literal string value.
-inline static inl* make_literal(int t, bstring s)
+inline static inl* make_literal(int t, chunk s)
{
- inl* e = (inl*) malloc(sizeof(inl));
- e->tag = t;
- e->content.literal = s;
- e->next = NULL;
- return e;
+ inl* e = (inl*) malloc(sizeof(inl));
+ e->tag = t;
+ e->content.literal = s;
+ e->next = NULL;
+ return e;
}
// Create an inline with no value.
inline static inl* make_simple(int t)
{
- inl* e = (inl*) malloc(sizeof(inl));
- e->tag = t;
- e->next = NULL;
- return e;
+ inl* e = (inl*) malloc(sizeof(inl));
+ e->tag = t;
+ e->next = NULL;
+ return e;
}
// Macros for creating various kinds of inlines.
@@ -139,113 +160,157 @@ inline static inl* make_simple(int t)
#define make_linebreak() make_simple(linebreak)
#define make_softbreak() make_simple(softbreak)
#define make_link(label, url, title) make_linkable(link, label, url, title)
-#define make_image(alt, url, title) make_linkable(image, alt, url, title)
#define make_emph(contents) make_inlines(emph, contents)
#define make_strong(contents) make_inlines(strong, contents)
// Free an inline list.
extern void free_inlines(inl* e)
{
- inl * next;
- while (e != NULL) {
- switch (e->tag){
- case str:
- case raw_html:
- case code:
- case entity:
- bdestroy(e->content.literal);
- break;
- case linebreak:
- case softbreak:
- break;
- case link:
- case image:
- bdestroy(e->content.linkable.url);
- bdestroy(e->content.linkable.title);
- free_inlines(e->content.linkable.label);
- break;
- case emph:
- case strong:
- free_inlines(e->content.inlines);
- break;
- default:
- break;
- }
- next = e->next;
- free(e);
- e = next;
- }
+ inl * next;
+ while (e != NULL) {
+ switch (e->tag){
+ case str:
+ case raw_html:
+ case code:
+ case entity:
+ chunk_free(&e->content.literal);
+ break;
+ case linebreak:
+ case softbreak:
+ break;
+ case link:
+ case image:
+ free(e->content.linkable.url);
+ free(e->content.linkable.title);
+ free_inlines(e->content.linkable.label);
+ break;
+ case emph:
+ case strong:
+ free_inlines(e->content.inlines);
+ break;
+ default:
+ break;
+ }
+ next = e->next;
+ free(e);
+ e = next;
+ }
}
// Append inline list b to the end of inline list a.
// Return pointer to head of new list.
inline static inl* append_inlines(inl* a, inl* b)
{
- if (a == NULL) { // NULL acts like an empty list
- return b;
- }
- inl* cur = a;
- while (cur->next) {
- cur = cur->next;
- }
- cur->next = b;
- return a;
+ if (a == NULL) { // NULL acts like an empty list
+ return b;
+ }
+ inl* cur = a;
+ while (cur->next) {
+ cur = cur->next;
+ }
+ cur->next = b;
+ return a;
}
// Make a 'subject' from an input string.
-static subject* make_subject(bstring s, reference** refmap)
+static void init_subject(subject *e, gh_buf *buffer, int input_pos, reference** refmap)
{
- subject* e = (subject*) malloc(sizeof(subject));
- // remove final whitespace
- brtrimws(s);
- e->buffer = s;
- e->pos = 0;
- e->label_nestlevel = 0;
- e->reference_map = refmap;
- return e;
+ e->buffer = buffer;
+ e->pos = input_pos;
+ e->label_nestlevel = 0;
+ e->reference_map = refmap;
}
inline static int isbacktick(int c)
{
- return (c == '`');
+ return (c == '`');
+}
+
+inline static void chunk_free(chunk *c)
+{
+ if (c->alloc)
+ free((char *)c->data);
+
+ c->data = NULL;
+ c->alloc = 0;
+ c->len = 0;
+}
+
+inline static void chunk_trim(chunk *c)
+{
+ while (c->len && isspace(c->data[0])) {
+ c->data++;
+ c->len--;
+ }
+
+ while (c->len > 0) {
+ if (!isspace(c->data[c->len - 1]))
+ break;
+
+ c->len--;
+ }
+}
+
+inline static unsigned char *chunk_to_cstr(chunk *c)
+{
+ unsigned char *str;
+
+ str = malloc(c->len + 1);
+ memcpy(str, c->data, c->len);
+ str[c->len] = 0;
+
+ return str;
+}
+
+inline static chunk chunk_literal(const char *data)
+{
+ chunk c = {data, strlen(data), 0};
+ return c;
+}
+
+inline static chunk chunk_buf(const gh_buf *buf, int pos, int len)
+{
+ chunk c = {buf->ptr + pos, len, 0};
+ return c;
+}
+
+inline static chunk chunk_buf_detach(gh_buf *buf)
+{
+ chunk c;
+
+ c.len = buf->size;
+ c.data = gh_buf_detach(buf);
+ c.alloc = 1;
+
+ return c;
}
// Return the next character in the subject, without advancing.
// Return 0 if at the end of the subject.
-#define peek_char(subj) bchar(subj->buffer, subj->pos)
+#define peek_char(subj) gh_buf_at((subj)->buffer, (subj)->pos)
// Return true if there are more characters in the subject.
inline static int is_eof(subject* subj)
{
- return (subj->pos >= blength(subj->buffer));
+ return (subj->pos >= gh_buf_len(subj->buffer));
}
// Advance the subject. Doesn't check for eof.
-#define advance(subj) subj->pos += 1
+#define advance(subj) (subj)->pos += 1
// Take characters while a predicate holds, and return a string.
-inline static bstring take_while(subject* subj, int (*f)(int))
+inline static chunk take_while(subject* subj, int (*f)(int))
{
- unsigned char c;
- int startpos = subj->pos;
- int len = 0;
- while ((c = peek_char(subj)) && (*f)(c)) {
- advance(subj);
- len++;
- }
- return bmidstr(subj->buffer, startpos, len);
-}
+ unsigned char c;
+ int startpos = subj->pos;
+ int len = 0;
-// Take one character and return a string, or NULL if eof.
-inline static bstring take_one(subject* subj)
-{
- int startpos = subj->pos;
- if (is_eof(subj)){
- return NULL;
- } else {
- advance(subj);
- return bmidstr(subj->buffer, startpos, 1);
- }
+ while ((c = peek_char(subj)) && (*f)(c)) {
+ advance(subj);
+ len++;
+ }
+
+ return chunk_buf(subj->buffer, startpos, len);
}
// Try to process a backtick code span that began with a
@@ -255,381 +320,406 @@ inline static bstring take_one(subject* subj)
// after the closing backticks.
static int scan_to_closing_backticks(subject* subj, int openticklength)
{
- // read non backticks
- char c;
- while ((c = peek_char(subj)) && c != '`') {
- advance(subj);
- }
- if (is_eof(subj)) {
- return 0; // did not find closing ticks, return 0
- }
- int numticks = 0;
- while (peek_char(subj) == '`') {
- advance(subj);
- numticks++;
- }
- if (numticks != openticklength){
- return(scan_to_closing_backticks(subj, openticklength));
- }
- return (subj->pos);
-}
-
-// Destructively modify bstring, collapsing consecutive
+ // read non backticks
+ char c;
+ while ((c = peek_char(subj)) && c != '`') {
+ advance(subj);
+ }
+ if (is_eof(subj)) {
+ return 0; // did not find closing ticks, return 0
+ }
+ int numticks = 0;
+ while (peek_char(subj) == '`') {
+ advance(subj);
+ numticks++;
+ }
+ if (numticks != openticklength){
+ return(scan_to_closing_backticks(subj, openticklength));
+ }
+ return (subj->pos);
+}
+
+// Destructively modify string, collapsing consecutive
// space and newline characters into a single space.
-static int normalize_whitespace(bstring s)
-{
- bool last_char_was_space = false;
- int pos = 0;
- char c;
- while ((c = bchar(s, pos))) {
- switch (c) {
- case ' ':
- if (last_char_was_space) {
- bdelete(s, pos, 1);
- } else {
- pos++;
- }
- last_char_was_space = true;
- break;
- case '\n':
- if (last_char_was_space) {
- bdelete(s, pos, 1);
- } else {
- bdelete(s, pos, 1);
- binsertch(s, pos, 1, ' ');
- pos++;
- }
- last_char_was_space = true;
- break;
- default:
- pos++;
- last_char_was_space = false;
- }
- }
- return 0;
+static void normalize_whitespace(gh_buf *s)
+{
+ /* TODO */
+#if 0
+ bool last_char_was_space = false;
+ int pos = 0;
+ char c;
+ while ((c = gh_buf_at(s, pos))) {
+ switch (c) {
+ case ' ':
+ if (last_char_was_space) {
+ bdelete(s, pos, 1);
+ } else {
+ pos++;
+ }
+ last_char_was_space = true;
+ break;
+ case '\n':
+ if (last_char_was_space) {
+ bdelete(s, pos, 1);
+ } else {
+ bdelete(s, pos, 1);
+ binsertch(s, pos, 1, ' ');
+ pos++;
+ }
+ last_char_was_space = true;
+ break;
+ default:
+ pos++;
+ last_char_was_space = false;
+ }
+ }
+#endif
}
// Parse backtick code section or raw backticks, return an inline.
// Assumes that the subject has a backtick at the current position.
static inl* handle_backticks(subject *subj)
{
- bstring openticks = take_while(subj, isbacktick);
- bstring result;
- int ticklength = blength(openticks);
- int startpos = subj->pos;
- int endpos = scan_to_closing_backticks(subj, ticklength);
- if (endpos == 0) { // not found
- subj->pos = startpos; // rewind
- return make_str(openticks);
- } else {
- bdestroy(openticks);
- result = bmidstr(subj->buffer, startpos, endpos - startpos - ticklength);
- btrimws(result);
- normalize_whitespace(result);
- return make_code(result);
- }
+ chunk openticks = take_while(subj, isbacktick);
+ int startpos = subj->pos;
+ int endpos = scan_to_closing_backticks(subj, openticks.len);
+
+ if (endpos == 0) { // not found
+ subj->pos = startpos; // rewind
+ return make_str(openticks);
+ } else {
+ gh_buf buf = GH_BUF_INIT;
+
+ gh_buf_set(&buf, subj->buffer->ptr + startpos, endpos - startpos - openticks.len);
+ gh_buf_trim(&buf);
+ normalize_whitespace(&buf);
+
+ return make_code(chunk_buf_detach(&buf));
+ }
}
// Scan ***, **, or * and return number scanned, or 0.
// Don't advance position.
static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close)
{
- int numdelims = 0;
- char char_before, char_after;
- int startpos = subj->pos;
-
- char_before = subj->pos == 0 ? '\n' : bchar(subj->buffer, subj->pos - 1);
- while (peek_char(subj) == c) {
- numdelims++;
- advance(subj);
- }
- char_after = peek_char(subj);
- *can_open = numdelims > 0 && numdelims <= 3 && !isspace(char_after);
- *can_close = numdelims > 0 && numdelims <= 3 && !isspace(char_before);
- if (c == '_') {
- *can_open = *can_open && !isalnum(char_before);
- *can_close = *can_close && !isalnum(char_after);
- }
- subj->pos = startpos;
- return numdelims;
+ int numdelims = 0;
+ char char_before, char_after;
+ int startpos = subj->pos;
+
+ char_before = subj->pos == 0 ? '\n' : gh_buf_at(subj->buffer, subj->pos - 1);
+ while (peek_char(subj) == c) {
+ numdelims++;
+ advance(subj);
+ }
+ char_after = peek_char(subj);
+ *can_open = numdelims > 0 && numdelims <= 3 && !isspace(char_after);
+ *can_close = numdelims > 0 && numdelims <= 3 && !isspace(char_before);
+ if (c == '_') {
+ *can_open = *can_open && !isalnum(char_before);
+ *can_close = *can_close && !isalnum(char_after);
+ }
+ subj->pos = startpos;
+ return numdelims;
}
// Parse strong/emph or a fallback.
// Assumes the subject has '_' or '*' at the current position.
static inl* handle_strong_emph(subject* subj, char c)
{
- bool can_open, can_close;
- inl * result = NULL;
- inl ** last = malloc(sizeof(inl *));
- inl * new;
- inl * il;
- inl * first_head = NULL;
- inl * first_close = NULL;
- int first_close_delims = 0;
- int numdelims;
-
- *last = NULL;
-
- numdelims = scan_delims(subj, c, &can_open, &can_close);
- subj->pos += numdelims;
-
- new = make_str(bmidstr(subj->buffer, subj->pos - numdelims, numdelims));
- *last = new;
- first_head = new;
- result = new;
-
- if (!can_open || numdelims == 0) {
- goto done;
- }
-
- switch (numdelims) {
- case 1:
- while (true) {
- numdelims = scan_delims(subj, c, &can_open, &can_close);
- if (numdelims >= 1 && can_close) {
- subj->pos += 1;
- first_head->tag = emph;
- bdestroy(first_head->content.literal);
- first_head->content.inlines = first_head->next;
- first_head->next = NULL;
- goto done;
- } else {
- if (!parse_inline(subj, last)) {
- goto done;
- }
- }
- }
- break;
- case 2:
- while (true) {
- numdelims = scan_delims(subj, c, &can_open, &can_close);
- if (numdelims >= 2 && can_close) {
- subj->pos += 2;
- first_head->tag = strong;
- bdestroy(first_head->content.literal);
- first_head->content.inlines = first_head->next;
- first_head->next = NULL;
- goto done;
- } else {
- if (!parse_inline(subj, last)) {
- goto done;
- }
- }
- }
- break;
- case 3:
- while (true) {
- numdelims = scan_delims(subj, c, &can_open, &can_close);
- if (can_close && numdelims >= 1 && numdelims <= 3 &&
- numdelims != first_close_delims) {
- new = make_str(bmidstr(subj->buffer, subj->pos, numdelims));
- append_inlines(*last, new);
- *last = new;
-
- if (first_close_delims == 1 && numdelims > 2) {
- numdelims = 2;
- } else if (first_close_delims == 2) {
- numdelims = 1;
- } else if (numdelims == 3) {
- // If we opened with ***, we interpret it as ** followed by *
- // giving us
- numdelims = 1;
- }
-
- subj->pos += numdelims;
- if (first_close) {
- first_head->tag = first_close_delims == 1 ? strong : emph;
- bdestroy(first_head->content.literal);
- first_head->content.inlines =
- make_inlines(first_close_delims == 1 ? emph : strong,
- first_head->next);
-
- il = first_head->next;
- while (il->next && il->next != first_close) {
- il = il->next;
- }
- il->next = NULL;
-
- first_head->content.inlines->next = first_close->next;
-
- il = first_head->content.inlines;
- while (il->next && il->next != *last) {
- il = il->next;
- }
- il->next = NULL;
- free_inlines(*last);
-
- first_close->next = NULL;
- free_inlines(first_close);
- first_head->next = NULL;
- goto done;
- } else {
- first_close = *last;
- first_close_delims = numdelims;
- }
- } else {
- if (!parse_inline(subj, last)) {
- goto done;
- }
- }
- }
- break;
- default:
- goto done;
- }
-
- done:
- free(last);
- return result;
+ bool can_open, can_close;
+ inl * result = NULL;
+ inl ** last = malloc(sizeof(inl *));
+ inl * new;
+ inl * il;
+ inl * first_head = NULL;
+ inl * first_close = NULL;
+ int first_close_delims = 0;
+ int numdelims;
+
+ *last = NULL;
+
+ numdelims = scan_delims(subj, c, &can_open, &can_close);
+ subj->pos += numdelims;
+
+ new = make_str(chunk_buf(subj->buffer, subj->pos - numdelims, numdelims));
+ *last = new;
+ first_head = new;
+ result = new;
+
+ if (!can_open || numdelims == 0) {
+ goto done;
+ }
+
+ switch (numdelims) {
+ case 1:
+ while (true) {
+ numdelims = scan_delims(subj, c, &can_open, &can_close);
+ if (numdelims >= 1 && can_close) {
+ subj->pos += 1;
+ first_head->tag = emph;
+ chunk_free(&first_head->content.literal);
+ first_head->content.inlines = first_head->next;
+ first_head->next = NULL;
+ goto done;
+ } else {
+ if (!parse_inline(subj, last)) {
+ goto done;
+ }
+ }
+ }
+ break;
+ case 2:
+ while (true) {
+ numdelims = scan_delims(subj, c, &can_open, &can_close);
+ if (numdelims >= 2 && can_close) {
+ subj->pos += 2;
+ first_head->tag = strong;
+ chunk_free(&first_head->content.literal);
+ first_head->content.inlines = first_head->next;
+ first_head->next = NULL;
+ goto done;
+ } else {
+ if (!parse_inline(subj, last)) {
+ goto done;
+ }
+ }
+ }
+ break;
+ case 3:
+ while (true) {
+ numdelims = scan_delims(subj, c, &can_open, &can_close);
+ if (can_close && numdelims >= 1 && numdelims <= 3 &&
+ numdelims != first_close_delims) {
+ new = make_str(chunk_buf(subj->buffer, subj->pos, numdelims));
+ append_inlines(*last, new);
+ *last = new;
+ if (first_close_delims == 1 && numdelims > 2) {
+ numdelims = 2;
+ } else if (first_close_delims == 2) {
+ numdelims = 1;
+ } else if (numdelims == 3) {
+ // If we opened with ***, we interpret it as ** followed by *
+ // giving us
+ numdelims = 1;
+ }
+ subj->pos += numdelims;
+ if (first_close) {
+ first_head->tag = first_close_delims == 1 ? strong : emph;
+ chunk_free(&first_head->content.literal);
+ first_head->content.inlines =
+ make_inlines(first_close_delims == 1 ? emph : strong,
+ first_head->next);
+
+ il = first_head->next;
+ while (il->next && il->next != first_close) {
+ il = il->next;
+ }
+ il->next = NULL;
+
+ first_head->content.inlines->next = first_close->next;
+
+ il = first_head->content.inlines;
+ while (il->next && il->next != *last) {
+ il = il->next;
+ }
+ il->next = NULL;
+ free_inlines(*last);
+
+ first_close->next = NULL;
+ free_inlines(first_close);
+ first_head->next = NULL;
+ goto done;
+ } else {
+ first_close = *last;
+ first_close_delims = numdelims;
+ }
+ } else {
+ if (!parse_inline(subj, last)) {
+ goto done;
+ }
+ }
+ }
+ break;
+ default:
+ goto done;
+ }
+
+done:
+ free(last);
+ return result;
}
// Parse backslash-escape or just a backslash, returning an inline.
static inl* handle_backslash(subject *subj)
{
- advance(subj);
- unsigned char nextchar = peek_char(subj);
- if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped
- advance(subj);
- return make_str(bformat("%c", nextchar));
- } else if (nextchar == '\n') {
- advance(subj);
- return make_linebreak();
- } else {
- return make_str(bfromcstr("\\"));
- }
+ advance(subj);
+ unsigned char nextchar = peek_char(subj);
+ if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped
+ advance(subj);
+ return make_str(chunk_buf(subj->buffer, subj->pos - 1, 1));
+ } else if (nextchar == '\n') {
+ advance(subj);
+ return make_linebreak();
+ } else {
+ return make_str(chunk_literal("\\"));
+ }
}
// Parse an entity or a regular "&" string.
// Assumes the subject has an '&' character at the current position.
static inl* handle_entity(subject* subj)
{
- int match;
- inl * result;
- match = scan_entity(subj->buffer, subj->pos);
- if (match) {
- result = make_entity(bmidstr(subj->buffer, subj->pos, match));
- subj->pos += match;
- } else {
- advance(subj);
- result = make_str(bfromcstr("&"));
- }
- return result;
+ int match;
+ inl *result;
+ match = scan_entity(subj->buffer, subj->pos);
+ if (match) {
+ result = make_entity(chunk_buf(subj->buffer, subj->pos, match));
+ subj->pos += match;
+ } else {
+ advance(subj);
+ result = make_str(chunk_literal("&"));
+ }
+ return result;
}
// Like make_str, but parses entities.
// Returns an inline sequence consisting of str and entity elements.
-static inl * make_str_with_entities(bstring s)
-{
- inl * result = NULL;
- inl * new;
- int searchpos;
- char c;
- subject * subj = make_subject(s, NULL);
-
- while ((c = peek_char(subj))) {
- switch (c) {
- case '&':
- new = handle_entity(subj);
- break;
- default:
- searchpos = bstrchrp(subj->buffer, '&', subj->pos);
- if (searchpos == BSTR_ERR) {
- searchpos = blength(subj->buffer);
- }
- new = make_str(bmidstr(subj->buffer, subj->pos, searchpos - subj->pos));
- subj->pos = searchpos;
- }
- result = append_inlines(result, new);
- }
- free(subj);
- return result;
+static inl *make_str_with_entities(chunk *content)
+{
+ inl * result = NULL;
+ inl * new;
+ int searchpos;
+ char c;
+ subject subj;
+ gh_buf content_buf = GH_BUF_INIT;
+
+ gh_buf_set(&content_buf, content->data, content->len);
+ init_subject(&subj, &content_buf, 0, NULL);
+
+ while ((c = peek_char(&subj))) {
+ switch (c) {
+ case '&':
+ new = handle_entity(&subj);
+ break;
+ default:
+ searchpos = gh_buf_strchr(subj.buffer, '&', subj.pos);
+ if (searchpos < 0) {
+ searchpos = gh_buf_len(subj.buffer);
+ }
+
+ new = make_str(chunk_buf(subj.buffer, subj.pos, searchpos - subj.pos));
+ subj.pos = searchpos;
+ }
+ result = append_inlines(result, new);
+ }
+
+ gh_buf_free(&content_buf);
+ return result;
}
// Destructively unescape a string: remove backslashes before punctuation chars.
-extern int unescape(bstring url)
+extern void unescape_buffer(gh_buf *buf)
{
- // remove backslashes before punctuation chars:
- int searchpos = 0;
- while ((searchpos = bstrchrp(url, '\\', searchpos)) != BSTR_ERR) {
- if (ispunct(bchar(url, searchpos + 1))) {
- bdelete(url, searchpos, 1);
- } else {
- searchpos++;
- }
- }
- return 0;
+ int r, w;
+
+ for (r = 0, w = 0; r < buf->size; ++r) {
+ if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1]))
+ continue;
+
+ buf->ptr[w++] = buf->ptr[r];
+ }
+
+ gh_buf_truncate(buf, w);
}
// Clean a URL: remove surrounding whitespace and surrounding <>,
// and remove \ that escape punctuation.
-static int clean_url(bstring url)
+static unsigned char *clean_url(chunk *url)
{
- // remove surrounding <> if any:
- int urllength = blength(url);
- btrimws(url);
- if (bchar(url, 0) == '<' && bchar(url, urllength - 1) == '>') {
- bdelete(url, 0, 1);
- bdelete(url, urllength - 2, 1);
- }
- unescape(url);
- return 0;
+ gh_buf buf = GH_BUF_INIT;
+
+ chunk_trim(url);
+
+ if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
+ gh_buf_set(&buf, url->data + 1, url->len - 2);
+ } else {
+ gh_buf_set(&buf, url->data, url->len);
+ }
+
+ unescape_buffer(&buf);
+ return gh_buf_detach(&buf);
}
// Clean a title: remove surrounding quotes and remove \ that escape punctuation.
-static int clean_title(bstring title)
+static unsigned char *clean_title(chunk *title)
{
- // remove surrounding quotes if any:
- int titlelength = blength(title);
- if ((bchar(title, 0) == '\'' && bchar(title, titlelength - 1) == '\'') ||
- (bchar(title, 0) == '(' && bchar(title, titlelength - 1) == ')') ||
- (bchar(title, 0) == '"' && bchar(title, titlelength - 1) == '"')) {
- bdelete(title, 0, 1);
- bdelete(title, titlelength - 2, 1);
- }
- unescape(title);
- return 0;
+ gh_buf buf = GH_BUF_INIT;
+ unsigned char first = title->data[0];
+ unsigned char last = title->data[title->len - 1];
+
+ // remove surrounding quotes if any:
+ if ((first == '\'' && last == '\'') ||
+ (first == '(' && last == ')') ||
+ (first == '"' && last == '"')) {
+ gh_buf_set(&buf, title->data + 1, title->len - 2);
+ } else {
+ gh_buf_set(&buf, title->data, title->len);
+ }
+
+ unescape_buffer(&buf);
+ return gh_buf_detach(&buf);
}
// Parse an autolink or HTML tag.
// Assumes the subject has a '<' character at the current position.
static inl* handle_pointy_brace(subject* subj)
{
- int matchlen = 0;
- bstring contents;
- inl* result;
-
- advance(subj); // advance past first <
- // first try to match a URL autolink
- matchlen = scan_autolink_uri(subj->buffer, subj->pos);
- if (matchlen > 0) {
- contents = bmidstr(subj->buffer, subj->pos, matchlen - 1);
- subj->pos += matchlen;
- result = make_link(make_str_with_entities(contents),
- bstrcpy(contents), bfromcstr(""));
- bdestroy(contents);
- return result;
- }
- // next try to match an email autolink
- matchlen = scan_autolink_email(subj->buffer, subj->pos);
- if (matchlen > 0) {
- contents = bmidstr(subj->buffer, subj->pos, matchlen - 1);
- subj->pos += matchlen;
- result = make_link(make_str_with_entities(contents),
- bformat("mailto:%s", contents->data),
- bfromcstr(""));
- bdestroy(contents);
- return result;
- }
- // finally, try to match an html tag
- matchlen = scan_html_tag(subj->buffer, subj->pos);
- if (matchlen > 0) {
- contents = bmidstr(subj->buffer, subj->pos, matchlen);
- binsertch(contents, 0, 1, '<');
- subj->pos += matchlen;
- return make_raw_html(contents);
- } else {// if nothing matches, just return the opening <:
- return make_str(bfromcstr("<"));
- }
+ int matchlen = 0;
+ chunk contents;
+
+ advance(subj); // advance past first <
+
+ // first try to match a URL autolink
+ matchlen = scan_autolink_uri(subj->buffer, subj->pos);
+ if (matchlen > 0) {
+ contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1);
+ subj->pos += matchlen;
+
+ return make_link(
+ make_str_with_entities(&contents),
+ contents,
+ chunk_literal("")
+ );
+ }
+
+ // next try to match an email autolink
+ matchlen = scan_autolink_email(subj->buffer, subj->pos);
+ if (matchlen > 0) {
+ gh_buf mail_url = GH_BUF_INIT;
+
+ contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1);
+ subj->pos += matchlen;
+
+ gh_buf_puts(&mail_url, "mailto:");
+ gh_buf_put(&mail_url, contents.data, contents.len);
+
+ return make_link(
+ make_str_with_entities(&contents),
+ chunk_buf_detach(&mail_url),
+ chunk_literal("")
+ );
+ }
+
+ // finally, try to match an html tag
+ matchlen = scan_html_tag(subj->buffer, subj->pos);
+ if (matchlen > 0) {
+ contents = chunk_buf(subj->buffer, subj->pos - 1, matchlen + 1);
+ subj->pos += matchlen;
+ return make_raw_html(contents);
+ }
+
+ // if nothing matches, just return the opening <:
+ return make_str(chunk_literal("<"));
}
// Parse a link label. Returns 1 if successful.
@@ -641,366 +731,381 @@ static inl* handle_pointy_brace(subject* subj)
// markers. So, 2 below contains a link while 1 does not:
// 1. [a link `with a ](/url)` character
// 2. [a link *with emphasized ](/url) text*
-static int link_label(subject* subj, bstring* raw_label)
-{
- int nestlevel = 0;
- inl* tmp = NULL;
- bstring raw;
- int startpos = subj->pos;
- if (subj->label_nestlevel) {
- // if we've already checked to the end of the subject
- // for a label, even with a different starting [, we
- // know we won't find one here and we can just return.
- // Note: nestlevel 1 would be: [foo [bar]
- // nestlevel 2 would be: [foo [bar [baz]
- subj->label_nestlevel--;
- return 0;
- }
- advance(subj); // advance past [
- char c;
- while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) {
- switch (c) {
- case '`':
- tmp = handle_backticks(subj);
- free_inlines(tmp);
- break;
- case '<':
- tmp = handle_pointy_brace(subj);
- free_inlines(tmp);
- break;
- case '[': // nested []
- nestlevel++;
- advance(subj);
- break;
- case ']': // nested []
- nestlevel--;
- advance(subj);
- break;
- case '\\':
- advance(subj);
- if (ispunct(peek_char(subj))) {
- advance(subj);
- }
- break;
- default:
- advance(subj);
- }
- }
- if (c == ']') {
- if (raw_label != NULL) {
- raw = bmidstr(subj->buffer, startpos + 1, subj->pos - (startpos + 1));
- *raw_label = raw;
- }
- subj->label_nestlevel = 0;
- advance(subj); // advance past ]
- return 1;
- } else {
- if (c == 0) {
- subj->label_nestlevel = nestlevel;
- }
- subj->pos = startpos; // rewind
- return 0;
- }
+static int link_label(subject* subj, chunk *raw_label)
+{
+ int nestlevel = 0;
+ inl* tmp = NULL;
+ int startpos = subj->pos;
+
+ if (subj->label_nestlevel) {
+ // if we've already checked to the end of the subject
+ // for a label, even with a different starting [, we
+ // know we won't find one here and we can just return.
+ // Note: nestlevel 1 would be: [foo [bar]
+ // nestlevel 2 would be: [foo [bar [baz]
+ subj->label_nestlevel--;
+ return 0;
+ }
+
+ advance(subj); // advance past [
+ char c;
+ while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) {
+ switch (c) {
+ case '`':
+ tmp = handle_backticks(subj);
+ free_inlines(tmp);
+ break;
+ case '<':
+ tmp = handle_pointy_brace(subj);
+ free_inlines(tmp);
+ break;
+ case '[': // nested []
+ nestlevel++;
+ advance(subj);
+ break;
+ case ']': // nested []
+ nestlevel--;
+ advance(subj);
+ break;
+ case '\\':
+ advance(subj);
+ if (ispunct(peek_char(subj))) {
+ advance(subj);
+ }
+ break;
+ default:
+ advance(subj);
+ }
+ }
+ if (c == ']') {
+ *raw_label = chunk_buf(
+ subj->buffer,
+ startpos + 1,
+ subj->pos - (startpos + 1)
+ );
+
+ subj->label_nestlevel = 0;
+ advance(subj); // advance past ]
+ return 1;
+ } else {
+ if (c == 0) {
+ subj->label_nestlevel = nestlevel;
+ }
+ subj->pos = startpos; // rewind
+ return 0;
+ }
}
// Parse a link or the link portion of an image, or return a fallback.
static inl* handle_left_bracket(subject* subj)
{
- inl* lab = NULL;
- inl* result = NULL;
- reference* ref;
- int n;
- int sps;
- int found_label;
- int endlabel, starturl, endurl, starttitle, endtitle, endall;
- bstring url, title, rawlabel, reflabel;
- bstring rawlabel2 = NULL;
- found_label = link_label(subj, &rawlabel);
- endlabel = subj->pos;
- if (found_label) {
- if (peek_char(subj) == '(' &&
- ((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) &&
- ((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) {
- // try to parse an explicit link:
- starturl = subj->pos + 1 + sps; // after (
- endurl = starturl + n;
- starttitle = endurl + scan_spacechars(subj->buffer, endurl);
- // ensure there are spaces btw url and title
- endtitle = (starttitle == endurl) ? starttitle :
- starttitle + scan_link_title(subj->buffer, starttitle);
- endall = endtitle + scan_spacechars(subj->buffer, endtitle);
- if (bchar(subj->buffer, endall) == ')') {
- subj->pos = endall + 1;
- url = bmidstr(subj->buffer, starturl, endurl - starturl);
- clean_url(url);
- title = bmidstr(subj->buffer, starttitle, endtitle - starttitle);
- clean_title(title);
- lab = parse_inlines(rawlabel, NULL);
- bdestroy(rawlabel);
- return make_link(lab, url, title);
- } else {
- // if we get here, we matched a label but didn't get further:
- subj->pos = endlabel;
- lab = parse_inlines(rawlabel, subj->reference_map);
- bdestroy(rawlabel);
- result = append_inlines(make_str(bfromcstr("[")),
- append_inlines(lab,
- make_str(bfromcstr("]"))));
- return result;
- }
- } else {
- // Check for reference link.
- // First, see if there's another label:
- subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel);
- reflabel = rawlabel;
- // if followed by a nonempty link label, we change reflabel to it:
- if (peek_char(subj) == '[' &&
- link_label(subj, &rawlabel2)) {
- if (blength(rawlabel2) > 0) {
- reflabel = rawlabel2;
- }
- } else {
- subj->pos = endlabel;
- }
- // lookup rawlabel in subject->reference_map:
- ref = lookup_reference(subj->reference_map, reflabel);
- if (ref != NULL) { // found
- lab = parse_inlines(rawlabel, NULL);
- result = make_link(lab, bstrcpy(ref->url), bstrcpy(ref->title));
- } else {
- subj->pos = endlabel;
- lab = parse_inlines(rawlabel, subj->reference_map);
- result = append_inlines(make_str(bfromcstr("[")),
- append_inlines(lab, make_str(bfromcstr("]"))));
- }
- bdestroy(rawlabel);
- bdestroy(rawlabel2);
- return result;
- }
- }
- // If we fall through to here, it means we didn't match a link:
- advance(subj); // advance past [
- return make_str(bfromcstr("["));
+ inl *lab = NULL;
+ inl *result = NULL;
+ reference *ref;
+ int n;
+ int sps;
+ int found_label;
+ int endlabel, starturl, endurl, starttitle, endtitle, endall;
+
+ chunk rawlabel;
+ chunk url, title;
+
+ found_label = link_label(subj, &rawlabel);
+ endlabel = subj->pos;
+
+ if (found_label) {
+ if (peek_char(subj) == '(' &&
+ ((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) &&
+ ((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) {
+
+ // try to parse an explicit link:
+ starturl = subj->pos + 1 + sps; // after (
+ endurl = starturl + n;
+ starttitle = endurl + scan_spacechars(subj->buffer, endurl);
+
+ // ensure there are spaces btw url and title
+ endtitle = (starttitle == endurl) ? starttitle :
+ starttitle + scan_link_title(subj->buffer, starttitle);
+
+ endall = endtitle + scan_spacechars(subj->buffer, endtitle);
+
+ if (gh_buf_at(subj->buffer, endall) == ')') {
+ subj->pos = endall + 1;
+
+ url = chunk_buf(subj->buffer, starturl, endurl - starturl);
+ title = chunk_buf(subj->buffer, starttitle, endtitle - starttitle);
+ lab = parse_chunk_inlines(&rawlabel, NULL);
+
+ return make_link(lab, url, title);
+ } else {
+ // if we get here, we matched a label but didn't get further:
+ subj->pos = endlabel;
+ lab = parse_chunk_inlines(&rawlabel, subj->reference_map);
+ result = append_inlines(make_str(chunk_literal("[")),
+ append_inlines(lab,
+ make_str(chunk_literal("]"))));
+ return result;
+ }
+ } else {
+ chunk rawlabel_tmp;
+ chunk reflabel;
+
+ // Check for reference link.
+ // First, see if there's another label:
+ subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel);
+ reflabel = rawlabel;
+
+ // if followed by a nonempty link label, we change reflabel to it:
+ if (peek_char(subj) == '[' && link_label(subj, &rawlabel_tmp)) {
+ if (rawlabel_tmp.len > 0)
+ reflabel = rawlabel_tmp;
+ } else {
+ subj->pos = endlabel;
+ }
+
+ // lookup rawlabel in subject->reference_map:
+ ref = lookup_reference(subj->reference_map, &reflabel);
+ if (ref != NULL) { // found
+ lab = parse_chunk_inlines(&rawlabel, NULL);
+ result = make_link(lab, chunk_literal(ref->url), chunk_literal(ref->title));
+ } else {
+ subj->pos = endlabel;
+ lab = parse_chunk_inlines(&rawlabel, subj->reference_map);
+ result = append_inlines(make_str(chunk_literal("[")),
+ append_inlines(lab, make_str(chunk_literal("]"))));
+ }
+ return result;
+ }
+ }
+ // If we fall through to here, it means we didn't match a link:
+ advance(subj); // advance past [
+ return make_str(chunk_literal("["));
}
// Parse a hard or soft linebreak, returning an inline.
// Assumes the subject has a newline at the current position.
static inl* handle_newline(subject *subj)
{
- int nlpos = subj->pos;
- // skip over newline
- advance(subj);
- // skip spaces at beginning of line
- while (peek_char(subj) == ' ') {
- advance(subj);
- }
- if (nlpos > 1 &&
- bchar(subj->buffer, nlpos - 1) == ' ' &&
- bchar(subj->buffer, nlpos - 2) == ' ') {
- return make_linebreak();
- } else {
- return make_softbreak();
- }
+ int nlpos = subj->pos;
+ // skip over newline
+ advance(subj);
+ // skip spaces at beginning of line
+ while (peek_char(subj) == ' ') {
+ advance(subj);
+ }
+ if (nlpos > 1 &&
+ gh_buf_at(subj->buffer, nlpos - 1) == ' ' &&
+ gh_buf_at(subj->buffer, nlpos - 2) == ' ') {
+ return make_linebreak();
+ } else {
+ return make_softbreak();
+ }
}
inline static int not_eof(subject* subj)
{
- return !is_eof(subj);
+ return !is_eof(subj);
}
// Parse inlines while a predicate is satisfied. Return inlines.
extern inl* parse_inlines_while(subject* subj, int (*f)(subject*))
{
- inl* result = NULL;
- inl** last = &result;
- while ((*f)(subj) && parse_inline(subj, last)) {
- }
- return result;
+ inl* result = NULL;
+ inl** last = &result;
+ while ((*f)(subj) && parse_inline(subj, last)) {
+ }
+ return result;
+}
+
+inl *parse_chunk_inlines(chunk *chunk, reference** refmap)
+{
+ inl *result;
+ subject subj;
+ gh_buf full_chunk = GH_BUF_INIT;
+
+ gh_buf_set(&full_chunk, chunk->data, chunk->len);
+ init_subject(&subj, &full_chunk, 0, refmap);
+ result = parse_inlines_while(&subj, not_eof);
+
+ gh_buf_free(&full_chunk);
+ return result;
+}
+
+static int find_special_char(subject *subj)
+{
+ int n = subj->pos + 1;
+ int size = (int)gh_buf_len(subj->buffer);
+
+ while (n < size) {
+ if (strchr("\n\\`&_*[]buffer, n)))
+ return n;
+ }
+
+ return -1;
}
// Parse an inline, advancing subject, and add it to last element.
// Adjust tail to point to new last element of list.
// Return 0 if no inline can be parsed, 1 otherwise.
-extern int parse_inline(subject* subj, inl ** last)
-{
- inl* new = NULL;
- bstring contents;
- bstring special_chars;
- unsigned char c;
- int endpos;
- c = peek_char(subj);
- if (c == 0) {
- return 0;
- }
- switch(c){
- case '\n':
- new = handle_newline(subj);
- break;
- case '`':
- new = handle_backticks(subj);
- break;
- case '\\':
- new = handle_backslash(subj);
- break;
- case '&':
- new = handle_entity(subj);
- break;
- case '<':
- new = handle_pointy_brace(subj);
- break;
- case '_':
- if (subj->pos > 0 && (isalnum(bchar(subj->buffer, subj->pos - 1)) ||
- bchar(subj->buffer, subj->pos - 1) == '_')) {
- new = make_str(take_one(subj));
- } else {
- new = handle_strong_emph(subj, '_');
- }
- break;
- case '*':
- new = handle_strong_emph(subj, '*');
- break;
- case '[':
- new = handle_left_bracket(subj);
- break;
- case '!':
- advance(subj);
- if (peek_char(subj) == '[') {
- new = handle_left_bracket(subj);
- if (new != NULL && new->tag == link) {
- new->tag = image;
- } else {
- new = append_inlines(make_str(bfromcstr("!")), new);
- }
- } else {
- new = make_str(bfromcstr("!"));
- }
- break;
- default:
- // we read until we hit a special character
- special_chars = bfromcstr("\n\\`&_*[]buffer, subj->pos, special_chars);
- bdestroy(special_chars);
- if (endpos == subj->pos) {
- // current char is special: read a 1-character str
- contents = take_one(subj);
- } else if (endpos == BSTR_ERR) {
- // special char not found, take whole rest of buffer:
- endpos = subj->buffer->slen;
- contents = bmidstr(subj->buffer, subj->pos, endpos - subj->pos);
- subj->pos = endpos;
- } else {
- // take buffer from subj->pos to endpos to str.
- contents = bmidstr(subj->buffer, subj->pos, endpos - subj->pos);
- subj->pos = endpos;
- // if we're at a newline, strip trailing spaces.
- if (peek_char(subj) == '\n') {
- brtrimws(contents);
- }
- }
- new = make_str(contents);
- }
- if (*last == NULL) {
- *last = new;
- } else {
- append_inlines(*last, new);
- }
- return 1;
-}
-
-extern inl* parse_inlines(bstring input, reference** refmap)
-{
- subject * subj = make_subject(input, refmap);
- inl * result = parse_inlines_while(subj, not_eof);
- free(subj);
- return result;
+static int parse_inline(subject* subj, inl ** last)
+{
+ inl* new = NULL;
+ chunk contents;
+ unsigned char c;
+ int endpos;
+ c = peek_char(subj);
+ if (c == 0) {
+ return 0;
+ }
+ switch(c){
+ case '\n':
+ new = handle_newline(subj);
+ break;
+ case '`':
+ new = handle_backticks(subj);
+ break;
+ case '\\':
+ new = handle_backslash(subj);
+ break;
+ case '&':
+ new = handle_entity(subj);
+ break;
+ case '<':
+ new = handle_pointy_brace(subj);
+ break;
+ case '_':
+ if (subj->pos > 0 && (isalnum(gh_buf_at(subj->buffer, subj->pos - 1)) ||
+ gh_buf_at(subj->buffer, subj->pos - 1) == '_')) {
+ goto text_literal;
+ }
+
+ new = handle_strong_emph(subj, '_');
+ break;
+ case '*':
+ new = handle_strong_emph(subj, '*');
+ break;
+ case '[':
+ new = handle_left_bracket(subj);
+ break;
+ case '!':
+ advance(subj);
+ if (peek_char(subj) == '[') {
+ new = handle_left_bracket(subj);
+ if (new != NULL && new->tag == link) {
+ new->tag = image;
+ } else {
+ new = append_inlines(make_str(chunk_literal("!")), new);
+ }
+ } else {
+ new = make_str(chunk_literal("!"));
+ }
+ break;
+ default:
+ text_literal:
+ endpos = find_special_char(subj);
+ if (endpos < 0) {
+ endpos = gh_buf_len(subj->buffer);
+ }
+
+ contents = chunk_buf(subj->buffer, subj->pos, endpos - subj->pos);
+ subj->pos = endpos;
+
+ // if we're at a newline, strip trailing spaces.
+ if (peek_char(subj) == '\n') {
+ chunk_trim(&contents);
+ }
+
+ new = make_str(contents);
+ }
+ if (*last == NULL) {
+ *last = new;
+ } else {
+ append_inlines(*last, new);
+ }
+ return 1;
+}
+
+extern inl* parse_inlines(gh_buf *input, int input_pos, reference** refmap)
+{
+ subject subj;
+ init_subject(&subj, input, input_pos, refmap);
+ return parse_inlines_while(&subj, not_eof);
}
// Parse zero or more space characters, including at most one newline.
void spnl(subject* subj)
{
- bool seen_newline = false;
- while (peek_char(subj) == ' ' ||
- (!seen_newline &&
- (seen_newline = peek_char(subj) == '\n'))) {
- advance(subj);
- }
+ bool seen_newline = false;
+ while (peek_char(subj) == ' ' ||
+ (!seen_newline &&
+ (seen_newline = peek_char(subj) == '\n'))) {
+ advance(subj);
+ }
}
// Parse reference. Assumes string begins with '[' character.
// Modify refmap if a reference is encountered.
// Return 0 if no reference found, otherwise position of subject
// after reference is parsed.
-extern int parse_reference(bstring input, reference** refmap)
-{
- subject * subj = make_subject(input, NULL);
- bstring lab = NULL;
- bstring url = NULL;
- bstring title = NULL;
- int matchlen = 0;
- int beforetitle;
- reference * new = NULL;
- int newpos;
-
- // parse label:
- if (!link_label(subj, &lab)) {
- free(subj);
- return 0;
- }
- // colon:
- if (peek_char(subj) == ':') {
- advance(subj);
- } else {
- free(subj);
- bdestroy(lab);
- return 0;
- }
- // parse link url:
- spnl(subj);
- matchlen = scan_link_url(subj->buffer, subj->pos);
- if (matchlen) {
- url = bmidstr(subj->buffer, subj->pos, matchlen);
- clean_url(url);
- subj->pos += matchlen;
- } else {
- free(subj);
- bdestroy(lab);
- bdestroy(url);
- return 0;
- }
- // parse optional link_title
- beforetitle = subj->pos;
- spnl(subj);
- matchlen = scan_link_title(subj->buffer, subj->pos);
- if (matchlen) {
- title = bmidstr(subj->buffer, subj->pos, matchlen);
- clean_title(title);
- subj->pos += matchlen;
- } else {
- subj->pos = beforetitle;
- title = bfromcstr("");
- }
- // parse final spaces and newline:
- while (peek_char(subj) == ' ') {
- advance(subj);
- }
- if (peek_char(subj) == '\n') {
- advance(subj);
- } else if (peek_char(subj) != 0) {
- free(subj);
- bdestroy(lab);
- bdestroy(url);
- bdestroy(title);
- return 0;
- }
- // insert reference into refmap
- new = make_reference(lab, url, title);
- add_reference(refmap, new);
-
- newpos = subj->pos;
- free(subj);
- bdestroy(lab);
- bdestroy(url);
- bdestroy(title);
- return newpos;
+extern int parse_reference(gh_buf *input, int input_pos, reference** refmap)
+{
+ subject subj;
+
+ chunk lab;
+ chunk url;
+ chunk title;
+
+ int matchlen = 0;
+ int beforetitle;
+ reference * new = NULL;
+
+ init_subject(&subj, input, input_pos, NULL);
+
+ // parse label:
+ if (!link_label(&subj, &lab))
+ return 0;
+
+ // colon:
+ if (peek_char(&subj) == ':') {
+ advance(&subj);
+ } else {
+ return 0;
+ }
+
+ // parse link url:
+ spnl(&subj);
+ matchlen = scan_link_url(subj.buffer, subj.pos);
+ if (matchlen) {
+ url = chunk_buf(subj.buffer, subj.pos, matchlen);
+ subj.pos += matchlen;
+ } else {
+ return 0;
+ }
+
+ // parse optional link_title
+ beforetitle = subj.pos;
+ spnl(&subj);
+ matchlen = scan_link_title(subj.buffer, subj.pos);
+ if (matchlen) {
+ title = chunk_buf(subj.buffer, subj.pos, matchlen);
+ subj.pos += matchlen;
+ } else {
+ subj.pos = beforetitle;
+ title = chunk_literal("");
+ }
+ // parse final spaces and newline:
+ while (peek_char(&subj) == ' ') {
+ advance(&subj);
+ }
+ if (peek_char(&subj) == '\n') {
+ advance(&subj);
+ } else if (peek_char(&subj) != 0) {
+ return 0;
+ }
+ // insert reference into refmap
+ new = make_reference(&lab, &url, &title);
+ add_reference(refmap, new);
+
+ return subj.pos;
}
--
cgit v1.2.3
From 582674e662d1f8757350c51486a5e0a837195e15 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Tue, 2 Sep 2014 13:18:04 +0200
Subject: ffffix
---
Makefile | 11 ++-
src/blocks.c | 58 +++++++----
src/buffer.c | 69 +++++--------
src/buffer.h | 19 ++--
src/html.c | 276 ----------------------------------------------------
src/inlines.c | 4 +-
src/main.c | 142 ++++++++++++---------------
src/print.c | 307 ++++++++++++++++++++++++++++++----------------------------
src/stmd.h | 13 +--
src/utf8.c | 6 +-
src/utf8.h | 6 --
11 files changed, 304 insertions(+), 607 deletions(-)
delete mode 100644 src/html.c
delete mode 100644 src/utf8.h
(limited to 'src/inlines.c')
diff --git a/Makefile b/Makefile
index cb5938d..d14a928 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ DATADIR=data
PROG=./stmd
.PHONY: all oldtests test spec benchjs testjs
-all: $(SRCDIR)/case_fold_switch.c $(PROG)
+all: $(SRCDIR)/case_fold_switch.inc $(PROG)
README.html: README.md template.html
pandoc --template template.html -S -s -t html5 -o $@ $<
@@ -41,13 +41,16 @@ testjs: spec.txt
benchjs:
node js/bench.js ${BENCHINP}
-$(PROG): $(SRCDIR)/main.c $(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/html.o $(SRCDIR)/utf8.o
+HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o
+STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o
+
+$(PROG): $(SRCDIR)/main.c $(HTML_OBJ) $(STMD_OBJ)
$(CC) $(LDFLAGS) -o $@ $^
$(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re
re2c --case-insensitive -bis $< > $@ || (rm $@ && false)
-$(SRCDIR)/case_fold_switch.inc $(DATADIR)/CaseFolding-3.2.0.txt
+$(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding-3.2.0.txt
perl mkcasefold.pl < $< > $@
.PHONY: leakcheck clean fuzztest dingus upload
@@ -72,7 +75,7 @@ update-site: spec.html narrative.html
(cd _site ; git pull ; git commit -a -m "Updated site for latest spec, narrative, js" ; git push; cd ..)
clean:
- -rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c
+ -rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c $(SRCDIR)/html/*.o
-rm -rf *.dSYM
-rm -f README.html
-rm -f spec.md fuzz.txt spec.html
diff --git a/src/blocks.c b/src/blocks.c
index eabac03..71dc830 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -3,11 +3,12 @@
#include
#include
#include
-#include "bstrlib.h"
+
#include "stmd.h"
-#include "uthash.h"
-#include "debug.h"
#include "scanners.h"
+#include "uthash.h"
+
+static void finalize(block* b, int line_number);
static block* make_block(int tag, int start_line, int start_column)
{
@@ -140,7 +141,7 @@ static int break_out_of_lists(block ** bptr, int line_number)
}
-extern void finalize(block* b, int line_number)
+static void finalize(block* b, int line_number)
{
int firstlinelen;
int pos;
@@ -364,7 +365,7 @@ static int lists_match(struct ListData list_data,
list_data.bullet_char == item_data.bullet_char);
}
-static void expand_tabs(gh_buf *ob, const char *line, size_t size)
+static void expand_tabs(gh_buf *ob, const unsigned char *line, size_t size)
{
size_t i = 0, tab = 0;
@@ -389,13 +390,43 @@ static void expand_tabs(gh_buf *ob, const char *line, size_t size)
}
}
-extern block *stmd_parse_document(const char *buffer, size_t len)
+static block *finalize_parsing(block *document, int linenum)
{
- gh_buf line = GH_BUF_INIT;
+ while (document != document->top) {
+ finalize(document, linenum);
+ document = document->parent;
+ }
+
+ finalize(document, linenum);
+ process_inlines(document, document->attributes.refmap);
+
+ return document;
+}
+extern block *stmd_parse_file(FILE *f)
+{
+ gh_buf line = GH_BUF_INIT;
+ unsigned char buffer[4096];
+ int linenum = 1;
block *document = make_document();
+
+ while (fgets((char *)buffer, sizeof(buffer), f)) {
+ expand_tabs(&line, buffer, strlen(buffer));
+ incorporate_line(&line, linenum, &document);
+ gh_buf_clear(&line);
+ linenum++;
+ }
+
+ gh_buf_free(&line);
+ return finalize_document(document, linenum);
+}
+
+extern block *stmd_parse_document(const unsigned char *buffer, size_t len)
+{
+ gh_buf line = GH_BUF_INIT;
int linenum = 1;
- const char *end = buffer + len;
+ const unsigned char *end = buffer + len;
+ block *document = make_document();
while (buffer < end) {
const char *eol = memchr(buffer, '\n', end - buffer);
@@ -414,16 +445,7 @@ extern block *stmd_parse_document(const char *buffer, size_t len)
}
gh_buf_free(&line);
-
- while (document != document->top) {
- finalize(document, linenum);
- document = document->parent;
- }
-
- finalize(document, linenum);
- process_inlines(document, document->attributes.refmap);
-
- return document;
+ return finalize_document(document, linenum);
}
// Process one line at a time, modifying a block.
diff --git a/src/buffer.c b/src/buffer.c
index b81e7fa..17dc864 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -31,10 +31,10 @@ void gh_buf_init(gh_buf *buf, int initial_size)
int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom)
{
- char *new_ptr;
- size_t new_size;
+ unsigned char *new_ptr;
+ int new_size;
- if (buf->ptr == gh_buf__oom || buf->asize < 0)
+ if (buf->ptr == gh_buf__oom)
return -1;
if (target_size <= buf->asize)
@@ -79,7 +79,7 @@ void gh_buf_free(gh_buf *buf)
{
if (!buf) return;
- if (buf->asize > 0 && buf->ptr != gh_buf__initbuf && buf->ptr != gh_buf__oom)
+ if (buf->ptr != gh_buf__initbuf && buf->ptr != gh_buf__oom)
free(buf->ptr);
gh_buf_init(buf, 0);
@@ -91,14 +91,9 @@ void gh_buf_clear(gh_buf *buf)
if (buf->asize > 0)
buf->ptr[0] = '\0';
-
- if (buf->asize < 0) {
- buf->ptr = gh_buf__initbuf;
- buf->asize = 0;
- }
}
-int gh_buf_set(gh_buf *buf, const char *data, int len)
+int gh_buf_set(gh_buf *buf, const unsigned char *data, int len)
{
if (len == 0 || data == NULL) {
gh_buf_clear(buf);
@@ -115,10 +110,12 @@ int gh_buf_set(gh_buf *buf, const char *data, int len)
int gh_buf_sets(gh_buf *buf, const char *string)
{
- return gh_buf_set(buf, string, string ? strlen(string) : 0);
+ return gh_buf_set(buf,
+ (const unsigned char *)string,
+ string ? strlen(string) : 0);
}
-int gh_buf_putc(gh_buf *buf, char c)
+int gh_buf_putc(gh_buf *buf, int c)
{
ENSURE_SIZE(buf, buf->size + 2);
buf->ptr[buf->size++] = c;
@@ -126,7 +123,7 @@ int gh_buf_putc(gh_buf *buf, char c)
return 0;
}
-int gh_buf_put(gh_buf *buf, const char *data, int len)
+int gh_buf_put(gh_buf *buf, const unsigned char *data, int len)
{
ENSURE_SIZE(buf, buf->size + len + 1);
memmove(buf->ptr + buf->size, data, len);
@@ -137,8 +134,7 @@ int gh_buf_put(gh_buf *buf, const char *data, int len)
int gh_buf_puts(gh_buf *buf, const char *string)
{
- assert(string);
- return gh_buf_put(buf, string, strlen(string));
+ return gh_buf_put(buf, (const unsigned char *)string, strlen(string));
}
int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap)
@@ -153,7 +149,7 @@ int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap)
va_copy(args, ap);
len = vsnprintf(
- buf->ptr + buf->size,
+ (char *)buf->ptr + buf->size,
buf->asize - buf->size,
format, args
);
@@ -187,9 +183,9 @@ int gh_buf_printf(gh_buf *buf, const char *format, ...)
return r;
}
-void gh_buf_copy_cstr(char *data, size_t datasize, const gh_buf *buf)
+void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf)
{
- size_t copylen;
+ int copylen;
assert(data && datasize && buf);
@@ -212,9 +208,9 @@ void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b)
*buf_b = t;
}
-char *gh_buf_detach(gh_buf *buf)
+unsigned char *gh_buf_detach(gh_buf *buf)
{
- char *data = buf->ptr;
+ unsigned char *data = buf->ptr;
if (buf->asize == 0 || buf->ptr == gh_buf__oom)
return NULL;
@@ -224,13 +220,13 @@ char *gh_buf_detach(gh_buf *buf)
return data;
}
-void gh_buf_attach(gh_buf *buf, char *ptr, int asize)
+void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize)
{
gh_buf_free(buf);
if (ptr) {
buf->ptr = ptr;
- buf->size = strlen(ptr);
+ buf->size = strlen((char *)ptr);
if (asize)
buf->asize = (asize < buf->size) ? buf->size + 1 : asize;
else /* pass 0 to fall back on strlen + 1 */
@@ -249,11 +245,11 @@ int gh_buf_cmp(const gh_buf *a, const gh_buf *b)
int gh_buf_strchr(const gh_buf *buf, int c, int pos)
{
- const char *p = memchr(buf->ptr + pos, c, buf->size - pos);
- if (!p)
- return -1;
+ const char *p = memchr(buf->ptr + pos, c, buf->size - pos);
+ if (!p)
+ return -1;
- return (int)(p - p->ptr);
+ return (int)(p - buf->ptr);
}
int gh_buf_strrchr(const gh_buf *buf, int c, int pos)
@@ -270,36 +266,21 @@ int gh_buf_strrchr(const gh_buf *buf, int c, int pos)
void gh_buf_truncate(gh_buf *buf, size_t len)
{
- assert(buf->asize >= 0);
-
if (len < buf->size) {
buf->size = len;
buf->ptr[buf->size] = '\0';
}
}
-void gh_buf_ltruncate(gh_buf *buf, size_t len)
-{
- assert(buf->asize >= 0);
-
- if (len && len < buf->size) {
- memmove(buf->ptr, buf->ptr + len, buf->size - len);
- buf->size -= len;
- buf->ptr[buf->size] = '\0';
- }
-}
-
void gh_buf_trim(gh_buf *buf)
{
- size_t i = 0;
-
- assert(buf->asize >= 0);
-
- /* ltrim */
+ /* TODO: leading whitespace? */
+ /*
while (i < buf->size && isspace(buf->ptr[i]))
i++;
gh_buf_truncate(buf, i);
+ */
/* rtrim */
while (buf->size > 0) {
diff --git a/src/buffer.h b/src/buffer.h
index 2581ee3..422ef02 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -24,13 +24,6 @@ extern unsigned char gh_buf__oom[];
*/
extern void gh_buf_init(gh_buf *buf, int initial_size);
-static inline void gh_buf_static(gh_buf *buf, unsigned char *source)
-{
- buf->ptr = source;
- buf->size = strlen(source);
- buf->asize = -1;
-}
-
/**
* Attempt to grow the buffer to hold at least `target_size` bytes.
*
@@ -81,13 +74,13 @@ static inline size_t gh_buf_len(const gh_buf *buf)
extern int gh_buf_cmp(const gh_buf *a, const gh_buf *b);
-extern void gh_buf_attach(gh_buf *buf, char *ptr, int asize);
-extern char *gh_buf_detach(gh_buf *buf);
+extern void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize);
+extern unsigned char *gh_buf_detach(gh_buf *buf);
extern void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf);
static inline const char *gh_buf_cstr(const gh_buf *buf)
{
- return buf->ptr;
+ return (char *)buf->ptr;
}
#define gh_buf_at(buf, n) ((buf)->ptr[n])
@@ -100,10 +93,10 @@ static inline const char *gh_buf_cstr(const gh_buf *buf)
* return code of these functions and call them in a series then just call
* gh_buf_oom at the end.
*/
-extern int gh_buf_set(gh_buf *buf, const char *data, int len);
+extern int gh_buf_set(gh_buf *buf, const unsigned char *data, int len);
extern int gh_buf_sets(gh_buf *buf, const char *string);
-extern int gh_buf_putc(gh_buf *buf, char c);
-extern int gh_buf_put(gh_buf *buf, const char *data, int len);
+extern int gh_buf_putc(gh_buf *buf, int c);
+extern int gh_buf_put(gh_buf *buf, const unsigned char *data, int len);
extern int gh_buf_puts(gh_buf *buf, const char *string);
extern int gh_buf_printf(gh_buf *buf, const char *format, ...)
__attribute__((format (printf, 2, 3)));
diff --git a/src/html.c b/src/html.c
deleted file mode 100644
index aeec5f1..0000000
--- a/src/html.c
+++ /dev/null
@@ -1,276 +0,0 @@
-#include
-#include
-#include
-#include "bstrlib.h"
-#include "stmd.h"
-#include "debug.h"
-#include "scanners.h"
-
-// Functions to convert block and inline lists to HTML strings.
-
-// Escape special characters in HTML. More efficient than
-// three calls to bfindreplace. If preserve_entities is set,
-// existing entities are left alone.
-static bstring escape_html(bstring inp, bool preserve_entities)
-{
- int pos = 0;
- int match;
- char c;
- bstring escapable = blk2bstr("&<>\"", 4);
- bstring ent;
- bstring s = bstrcpy(inp);
- while ((pos = binchr(s, pos, escapable)) != BSTR_ERR) {
- c = bchar(s,pos);
- switch (c) {
- case '<':
- bdelete(s, pos, 1);
- ent = blk2bstr("<", 4);
- binsert(s, pos, ent, ' ');
- bdestroy(ent);
- pos += 4;
- break;
- case '>':
- bdelete(s, pos, 1);
- ent = blk2bstr(">", 4);
- binsert(s, pos, ent, ' ');
- bdestroy(ent);
- pos += 4;
- break;
- case '&':
- if (preserve_entities && (match = scan_entity(s, pos))) {
- pos += match;
- } else {
- bdelete(s, pos, 1);
- ent = blk2bstr("&", 5);
- binsert(s, pos, ent, ' ');
- bdestroy(ent);
- pos += 5;
- }
- break;
- case '"':
- bdelete(s, pos, 1);
- ent = blk2bstr(""", 6);
- binsert(s, pos, ent, ' ');
- bdestroy(ent);
- pos += 6;
- break;
- default:
- bdelete(s, pos, 1);
- log_err("unexpected character %02x", c);
- }
- }
- bdestroy(escapable);
- return s;
-}
-
-static inline void cr(bstring buffer)
-{
- int c = bchar(buffer, blength(buffer) - 1);
- if (c != '\n' && c) {
- bconchar(buffer, '\n');
- }
-}
-
-// Convert a block list to HTML. Returns 0 on success, and sets result.
-extern int blocks_to_html(block* b, bstring* result, bool tight)
-{
- bstring contents = NULL;
- bstring escaped, escaped2;
- struct bstrList * info_words;
- struct ListData * data;
- bstring mbstart;
- bstring html = blk2bstr("", 0);
-
- while(b != NULL) {
- switch(b->tag) {
- case document:
- check(blocks_to_html(b->children, &contents, false) == 0,
- "error converting blocks to html");
- bformata(html, "%s", contents->data);
- bdestroy(contents);
- break;
- case paragraph:
- check(inlines_to_html(b->inline_content, &contents) == 0,
- "error converting inlines to html");
- if (tight) {
- bformata(html, "%s", contents->data);
- } else {
- cr(html);
- bformata(html, "%s
", contents->data);
- cr(html);
- }
- bdestroy(contents);
- break;
- case block_quote:
- check(blocks_to_html(b->children, &contents, false) == 0,
- "error converting blocks to html");
- cr(html);
- bformata(html, "\n%s
", contents->data);
- cr(html);
- bdestroy(contents);
- break;
- case list_item:
- check(blocks_to_html(b->children, &contents, tight) == 0,
- "error converting blocks to html");
- brtrimws(contents);
- cr(html);
- bformata(html, "%s", contents->data);
- cr(html);
- bdestroy(contents);
- break;
- case list:
- // make sure a list starts at the beginning of the line:
- cr(html);
- data = &(b->attributes.list_data);
- check(blocks_to_html(b->children, &contents, data->tight) == 0,
- "error converting blocks to html");
- mbstart = bformat(" start=\"%d\"", data->start);
- bformata(html, "<%s%s>\n%s%s>",
- data->list_type == bullet ? "ul" : "ol",
- data->start == 1 ? "" : (char*) mbstart->data,
- contents->data,
- data->list_type == bullet ? "ul" : "ol");
- cr(html);
- bdestroy(contents);
- bdestroy(mbstart);
- break;
- case atx_header:
- case setext_header:
- check(inlines_to_html(b->inline_content, &contents) == 0,
- "error converting inlines to html");
- cr(html);
- bformata(html, "%s",
- b->attributes.header_level,
- contents->data,
- b->attributes.header_level);
- cr(html);
- bdestroy(contents);
- break;
- case indented_code:
- escaped = escape_html(b->string_content, false);
- cr(html);
- bformata(html, "%s
", escaped->data);
- cr(html);
- bdestroy(escaped);
- break;
- case fenced_code:
- escaped = escape_html(b->string_content, false);
- cr(html);
- bformata(html, "attributes.fenced_code_data.info) > 0) {
- escaped2 = escape_html(b->attributes.fenced_code_data.info, true);
- info_words = bsplit(escaped2, ' ');
- bformata(html, " class=\"language-%s\"", info_words->entry[0]->data);
- bdestroy(escaped2);
- bstrListDestroy(info_words);
- }
- bformata(html, ">%s
", escaped->data);
- cr(html);
- bdestroy(escaped);
- break;
- case html_block:
- bformata(html, "%s", b->string_content->data);
- break;
- case hrule:
- bformata(html, "
");
- cr(html);
- break;
- case reference_def:
- break;
- default:
- log_warn("block type %d not implemented\n", b->tag);
- break;
- }
- b = b->next;
- }
- *result = html;
- return 0;
- error:
- return -1;
-}
-
-// Convert an inline list to HTML. Returns 0 on success, and sets result.
-extern int inlines_to_html(inl* ils, bstring* result)
-{
- bstring contents = NULL;
- bstring html = blk2bstr("", 0);
- bstring mbtitle, escaped, escaped2;
-
- while(ils != NULL) {
- switch(ils->tag) {
- case str:
- escaped = escape_html(ils->content.literal, false);
- bformata(html, "%s", escaped->data);
- bdestroy(escaped);
- break;
- case linebreak:
- bformata(html, "
\n");
- break;
- case softbreak:
- bformata(html, "\n");
- break;
- case code:
- escaped = escape_html(ils->content.literal, false);
- bformata(html, "%s
", escaped->data);
- bdestroy(escaped);
- break;
- case raw_html:
- case entity:
- bformata(html, "%s", ils->content.literal->data);
- break;
- case link:
- check(inlines_to_html(ils->content.inlines, &contents) == 0,
- "error converting inlines to html");
- if (blength(ils->content.linkable.title) > 0) {
- escaped = escape_html(ils->content.linkable.title, true);
- mbtitle = bformat(" title=\"%s\"", escaped->data);
- bdestroy(escaped);
- } else {
- mbtitle = blk2bstr("",0);
- }
- escaped = escape_html(ils->content.linkable.url, true);
- bformata(html, "%s",
- escaped->data,
- mbtitle->data,
- contents->data);
- bdestroy(escaped);
- bdestroy(mbtitle);
- bdestroy(contents);
- break;
- case image:
- check(inlines_to_html(ils->content.inlines, &contents) == 0,
- "error converting inlines to html");
- escaped = escape_html(ils->content.linkable.url, true);
- escaped2 = escape_html(contents, false);
- bdestroy(contents);
- bformata(html, "
data, escaped2->data);
- bdestroy(escaped);
- bdestroy(escaped2);
- if (blength(ils->content.linkable.title) > 0) {
- escaped = escape_html(ils->content.linkable.title, true);
- bformata(html, " title=\"%s\"", escaped->data);
- bdestroy(escaped);
- }
- bformata(html, " />");
- break;
- case strong:
- check(inlines_to_html(ils->content.inlines, &contents) == 0,
- "error converting inlines to html");
- bformata(html, "%s", contents->data);
- bdestroy(contents);
- break;
- case emph:
- check(inlines_to_html(ils->content.inlines, &contents) == 0,
- "error converting inlines to html");
- bformata(html, "%s", contents->data);
- bdestroy(contents);
- break;
- }
- ils = ils->next;
- }
- *result = html;
- return 0;
- error:
- return -1;
-}
diff --git a/src/inlines.c b/src/inlines.c
index 4ff45ad..82c7219 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -6,9 +6,7 @@
#include "stmd.h"
#include "uthash.h"
-#include "debug.h"
#include "scanners.h"
-#include "utf8.h"
typedef struct Subject {
const gh_buf *buffer;
@@ -119,7 +117,7 @@ inline static inl* make_linkable(int t, inl* label, chunk url, chunk title)
e->tag = t;
e->content.linkable.label = label;
e->content.linkable.url = chunk_to_cstr(&url);
- e->content.linkable.title = chunk_to_cstr(&title);
+ e->content.linkable.title = url.len ? chunk_to_cstr(&title) : NULL;
e->next = NULL;
return e;
}
diff --git a/src/main.c b/src/main.c
index 9e0a3c8..e1abedc 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,99 +1,77 @@
#include
#include
-#include "bstrlib.h"
+#include
#include "stmd.h"
#include "debug.h"
void print_usage()
{
- printf("Usage: stmd [FILE*]\n");
- printf("Options: --help, -h Print usage information\n");
- printf(" --ast Print AST instead of HTML\n");
- printf(" --version Print version\n");
+ printf("Usage: stmd [FILE*]\n");
+ printf("Options: --help, -h Print usage information\n");
+ printf(" --ast Print AST instead of HTML\n");
+ printf(" --version Print version\n");
}
-int main(int argc, char *argv[]) {
- int i;
- bool ast = false;
- int g = 0;
- int numfps = 0;
- int files[argc];
+static void print_document(block *document, bool ast)
+{
+ gh_buf html = GH_BUF_INIT;
+
+ if (ast) {
+ print_blocks(document, 0);
+ } else {
+ blocks_to_html(&html, document, false);
+ printf("%s", html.ptr);
+ gh_buf_free(&html);
+ }
+}
- for (i=1; i < argc; i++) {
- if (strcmp(argv[i], "--version") == 0) {
- printf("stmd %s", VERSION);
- printf(" - CommonMark converter (c) 2014 John MacFarlane\n");
- exit(0);
- } else if ((strcmp(argv[i], "--help") == 0) ||
- (strcmp(argv[i], "-h") == 0)) {
- print_usage();
- exit(0);
- } else if (strcmp(argv[i], "--ast") == 0) {
- ast = true;
- } else if (*argv[i] == '-') {
- print_usage();
- exit(1);
- } else { // treat as file argument
- files[g] = i;
- g++;
- }
- }
+int main(int argc, char *argv[])
+{
+ int i, numfps = 0;
+ bool ast = false;
+ int files[argc];
+ block *document = NULL;
- numfps = g;
- bstring s = NULL;
- bstring html;
- g = 0;
- block * cur = make_document();
- int linenum = 1;
- extern int errno;
- FILE * fp = NULL;
+ for (i = 1; i < argc; i++) {
+ if (strcmp(argv[i], "--version") == 0) {
+ printf("stmd %s", VERSION);
+ printf(" - CommonMark converter (c) 2014 John MacFarlane\n");
+ exit(0);
+ } else if ((strcmp(argv[i], "--help") == 0) ||
+ (strcmp(argv[i], "-h") == 0)) {
+ print_usage();
+ exit(0);
+ } else if (strcmp(argv[i], "--ast") == 0) {
+ ast = true;
+ } else if (*argv[i] == '-') {
+ print_usage();
+ exit(1);
+ } else { // treat as file argument
+ files[numfps++] = i;
+ }
+ }
- if (numfps == 0) {
- // read from stdin
- while ((s = bgets((bNgetc) fgetc, stdin, '\n'))) {
- check(incorporate_line(s, linenum, &cur) == 0,
- "error incorporating line %d", linenum);
- bdestroy(s);
- linenum++;
- }
- } else {
- // iterate over input file pointers
- for (g=0; g < numfps; g++) {
+ if (numfps == 0) {
+ document = stmd_parse_file(stdin);
+ print_document(document, ast);
+ free_blocks(document);
+ } else {
+ for (i = 0; i < numfps; i++) {
+ FILE *fp = fopen(argv[files[i]], "r");
- fp = fopen(argv[files[g]], "r");
- if (fp == NULL) {
- fprintf(stderr, "Error opening file %s: %s\n",
- argv[files[g]], strerror(errno));
- exit(1);
- }
+ if (fp == NULL) {
+ fprintf(stderr, "Error opening file %s: %s\n",
+ argv[files[i]], strerror(errno));
+ exit(1);
+ }
- while ((s = bgets((bNgetc) fgetc, fp, '\n'))) {
- check(incorporate_line(s, linenum, &cur) == 0,
- "error incorporating line %d", linenum);
- bdestroy(s);
- linenum++;
- }
- fclose(fp);
- }
- }
+ document = stmd_parse_file(fp);
+ print_document(document, ast);
+ free_blocks(document);
+ fclose(fp);
+ }
+ }
- while (cur != cur->top) {
- finalize(cur, linenum);
- cur = cur->parent;
- }
- check(cur == cur->top, "problems finalizing open containers");
- finalize(cur, linenum);
- process_inlines(cur, cur->attributes.refmap);
- if (ast) {
- print_blocks(cur, 0);
- } else {
- check(blocks_to_html(cur, &html, false) == 0, "could not format as HTML");
- // printf("%s", html->data);
- bdestroy(html);
- }
- free_blocks(cur);
- return 0;
-error:
- return -1;
+ return 0;
}
diff --git a/src/print.c b/src/print.c
index a924870..3ebde16 100644
--- a/src/print.c
+++ b/src/print.c
@@ -1,168 +1,175 @@
#include
#include
-#include "bstrlib.h"
+#include
#include "stmd.h"
#include "debug.h"
-static bstring format_str(bstring s)
+static void print_str(const unsigned char *s, int len)
{
- int pos = 0;
- int len = blength(s);
- bstring result = bfromcstr("");
- char c;
- bformata(result, "\"");
- while (pos < len) {
- c = bchar(s, pos);
- switch (c) {
- case '\n':
- bformata(result, "\\n");
- break;
- case '"':
- bformata(result, "\\\"");
- break;
- case '\\':
- bformata(result, "\\\\");
- break;
- default:
- bformata(result, "%c", c);
- }
- pos++;
- }
- bformata(result, "\"");
- return result;
+ int i;
+
+ if (len < 0)
+ len = strlen(s);
+
+ putchar('"');
+ for (i = 0; i < len; ++i) {
+ unsigned char c = s[i];
+
+ switch (c) {
+ case '\n':
+ printf("\\n");
+ break;
+ case '"':
+ printf("\\\"");
+ break;
+ case '\\':
+ printf("\\\\");
+ break;
+ default:
+ putchar((int)c);
+ }
+ }
+ putchar('"');
}
// Functions to pretty-print inline and block lists, for debugging.
// Prettyprint an inline list, for debugging.
extern void print_blocks(block* b, int indent)
{
- struct ListData * data;
- while(b != NULL) {
- // printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line);
- for (int i=0; i < indent; i++) {
- putchar(' ');
- }
- switch(b->tag) {
- case document:
- printf("document\n");
- print_blocks(b->children, indent + 2);
- break;
- case block_quote:
- printf("block_quote\n");
- print_blocks(b->children, indent + 2);
- break;
- case list_item:
- data = &(b->attributes.list_data);
- printf("list_item\n");
- print_blocks(b->children, indent + 2);
- break;
- case list:
- data = &(b->attributes.list_data);
- if (data->list_type == ordered) {
- printf("list (type=ordered tight=%s start=%d delim=%s)\n",
- (data->tight ? "true" : "false"),
- data->start,
- (data->delimiter == parens ? "parens" : "period"));
- } else {
- printf("list (type=bullet tight=%s bullet_char=%c)\n",
- (data->tight ? "true" : "false"),
- data->bullet_char);
- }
- print_blocks(b->children, indent + 2);
- break;
- case atx_header:
- printf("atx_header (level=%d)\n", b->attributes.header_level);
- print_inlines(b->inline_content, indent + 2);
- break;
- case setext_header:
- printf("setext_header (level=%d)\n", b->attributes.header_level);
- print_inlines(b->inline_content, indent + 2);
- break;
- case paragraph:
- printf("paragraph\n");
- print_inlines(b->inline_content, indent + 2);
- break;
- case hrule:
- printf("hrule\n");
- break;
- case indented_code:
- printf("indented_code %s\n", format_str(b->string_content)->data);
- break;
- case fenced_code:
- printf("fenced_code length=%d info=%s %s\n",
- b->attributes.fenced_code_data.fence_length,
- format_str(b->attributes.fenced_code_data.info)->data,
- format_str(b->string_content)->data);
- break;
- case html_block:
- printf("html_block %s\n", format_str(b->string_content)->data);
- break;
- case reference_def:
- printf("reference_def\n");
- break;
- default:
- log_warn("block type %d not implemented\n", b->tag);
- break;
- }
- b = b->next;
- }
+ struct ListData *data;
+
+ while(b != NULL) {
+ // printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line);
+ for (int i=0; i < indent; i++) {
+ putchar(' ');
+ }
+
+ switch(b->tag) {
+ case document:
+ printf("document\n");
+ print_blocks(b->children, indent + 2);
+ break;
+ case block_quote:
+ printf("block_quote\n");
+ print_blocks(b->children, indent + 2);
+ break;
+ case list_item:
+ data = &(b->attributes.list_data);
+ printf("list_item\n");
+ print_blocks(b->children, indent + 2);
+ break;
+ case list:
+ data = &(b->attributes.list_data);
+ if (data->list_type == ordered) {
+ printf("list (type=ordered tight=%s start=%d delim=%s)\n",
+ (data->tight ? "true" : "false"),
+ data->start,
+ (data->delimiter == parens ? "parens" : "period"));
+ } else {
+ printf("list (type=bullet tight=%s bullet_char=%c)\n",
+ (data->tight ? "true" : "false"),
+ data->bullet_char);
+ }
+ print_blocks(b->children, indent + 2);
+ break;
+ case atx_header:
+ printf("atx_header (level=%d)\n", b->attributes.header_level);
+ print_inlines(b->inline_content, indent + 2);
+ break;
+ case setext_header:
+ printf("setext_header (level=%d)\n", b->attributes.header_level);
+ print_inlines(b->inline_content, indent + 2);
+ break;
+ case paragraph:
+ printf("paragraph\n");
+ print_inlines(b->inline_content, indent + 2);
+ break;
+ case hrule:
+ printf("hrule\n");
+ break;
+ case indented_code:
+ printf("indented_code ");
+ print_str(b->string_content.ptr, -1);
+ putchar('\n');
+ break;
+ case fenced_code:
+ printf("fenced_code length=%d info=",
+ b->attributes.fenced_code_data.fence_length);
+ print_str(b->attributes.fenced_code_data.info.ptr, -1);
+ putchar(' ');
+ print_str(b->string_content.ptr, -1);
+ putchar('\n');
+ break;
+ case html_block:
+ printf("html_block ");
+ print_str(b->string_content.ptr, -1);
+ putchar('\n');
+ break;
+ case reference_def:
+ printf("reference_def\n");
+ break;
+ default:
+ printf("# NOT IMPLEMENTED (%d)\n", b->tag);
+ break;
+ }
+ b = b->next;
+ }
}
// Prettyprint an inline list, for debugging.
extern void print_inlines(inl* ils, int indent)
{
- while(ils != NULL) {
- /*
- // we add 11 extra spaces for the line/column info
- for (int i=0; i < 11; i++) {
- putchar(' ');
- }
- putchar('|');
- putchar(' ');
- */
- for (int i=0; i < indent; i++) {
- putchar(' ');
- }
- switch(ils->tag) {
- case str:
- printf("str %s\n", format_str(ils->content.literal)->data);
- break;
- case linebreak:
- printf("linebreak\n");
- break;
- case softbreak:
- printf("softbreak\n");
- break;
- case code:
- printf("code %s\n", format_str(ils->content.literal)->data);
- break;
- case raw_html:
- printf("html %s\n", format_str(ils->content.literal)->data);
- break;
- case entity:
- printf("entity %s\n", format_str(ils->content.literal)->data);
- break;
- case link:
- printf("link url=%s title=%s\n",
- format_str(ils->content.linkable.url)->data,
- format_str(ils->content.linkable.title)->data);
- print_inlines(ils->content.linkable.label, indent + 2);
- break;
- case image:
- printf("image url=%s title=%s\n",
- format_str(ils->content.linkable.url)->data,
- format_str(ils->content.linkable.title)->data);
- print_inlines(ils->content.linkable.label, indent + 2);
- break;
- case strong:
- printf("strong\n");
- print_inlines(ils->content.linkable.label, indent + 2);
- break;
- case emph:
- printf("emph\n");
- print_inlines(ils->content.linkable.label, indent + 2);
- break;
- }
- ils = ils->next;
- }
+ while(ils != NULL) {
+ for (int i=0; i < indent; i++) {
+ putchar(' ');
+ }
+ switch(ils->tag) {
+ case str:
+ printf("str ");
+ print_str(ils->content.literal.data, ils->content.literal.len);
+ putchar('\n');
+ break;
+ case linebreak:
+ printf("linebreak\n");
+ break;
+ case softbreak:
+ printf("softbreak\n");
+ break;
+ case code:
+ printf("code ");
+ print_str(ils->content.literal.data, ils->content.literal.len);
+ putchar('\n');
+ break;
+ case raw_html:
+ printf("html ");
+ print_str(ils->content.literal.data, ils->content.literal.len);
+ putchar('\n');
+ break;
+ case entity:
+ printf("entity ");
+ print_str(ils->content.literal.data, ils->content.literal.len);
+ putchar('\n');
+ break;
+ case link:
+ case image:
+ printf("%s url=", ils->tag == link ? "link" : "image");
+ print_str(ils->content.linkable.url, -1);
+ if (ils->content.linkable.title) {
+ printf(" title=");
+ print_str(ils->content.linkable.title, -1);
+ }
+ putchar('\n');
+ print_inlines(ils->content.linkable.label, indent + 2);
+ break;
+ case strong:
+ printf("strong\n");
+ print_inlines(ils->content.linkable.label, indent + 2);
+ break;
+ case emph:
+ printf("emph\n");
+ print_inlines(ils->content.linkable.label, indent + 2);
+ break;
+ }
+ ils = ils->next;
+ }
}
diff --git a/src/stmd.h b/src/stmd.h
index eb1b989..dc24235 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -105,19 +105,14 @@ extern block* add_child(block* parent,
int block_type, int start_line, int start_column);
void free_blocks(block* e);
-block *stmd_parse_document(const char *buffer, size_t len);
-
-// FOR NOW:
-void process_inlines(block* cur, reference** refmap);
-void incorporate_line(gh_buf *ln, int line_number, block** curptr);
-void finalize(block* b, int line_number);
+extern block *stmd_parse_document(const unsigned char *buffer, size_t len);
+extern block *stmd_parse_file(FILE *f);
void print_inlines(inl* ils, int indent);
void print_blocks(block* blk, int indent);
-/* TODO */
-// int blocks_to_html(block* b, bstring* result, bool tight);
-// int inlines_to_html(inl* b, bstring* result);
+void blocks_to_html(gh_buf *html, block *b, bool tight);
+void inlines_to_html(gh_buf *html, inl *b);
void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len);
diff --git a/src/utf8.c b/src/utf8.c
index 1a5df9e..e3f8dd3 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -1,6 +1,8 @@
#include
-#include "bstrlib.h"
-#include "debug.h"
+#include
+#include
+
+#include "stmd.h"
static const int8_t utf8proc_utf8class[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
diff --git a/src/utf8.h b/src/utf8.h
deleted file mode 100644
index fe59a90..0000000
--- a/src/utf8.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#include
-#include "bstrlib.h"
-
-extern unsigned char * from_utf8(unsigned char * s, unsigned int *n);
-extern int to_utf8(unsigned int c, bstring dest);
-extern bstring case_fold(bstring source);
--
cgit v1.2.3
From 24248c0f1a6de6f229890c5c03aeff8738214fee Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Tue, 2 Sep 2014 13:30:13 +0200
Subject: Rename inlines
---
src/inlines.c | 50 +++++++++++++++++++++++++-------------------------
src/print.c | 22 +++++++++++-----------
src/stmd.h | 4 ++--
3 files changed, 38 insertions(+), 38 deletions(-)
(limited to 'src/inlines.c')
diff --git a/src/inlines.c b/src/inlines.c
index 82c7219..b9ece0e 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -151,15 +151,15 @@ inline static inl* make_simple(int t)
}
// Macros for creating various kinds of inlines.
-#define make_str(s) make_literal(str, s)
-#define make_code(s) make_literal(code, s)
-#define make_raw_html(s) make_literal(raw_html, s)
-#define make_entity(s) make_literal(entity, s)
-#define make_linebreak() make_simple(linebreak)
-#define make_softbreak() make_simple(softbreak)
-#define make_link(label, url, title) make_linkable(link, label, url, title)
-#define make_emph(contents) make_inlines(emph, contents)
-#define make_strong(contents) make_inlines(strong, contents)
+#define make_str(s) make_literal(INL_STRING, s)
+#define make_code(s) make_literal(INL_CODE, s)
+#define make_raw_html(s) make_literal(INL_RAW_HTML, s)
+#define make_entity(s) make_literal(INL_ENTITY, s)
+#define make_linebreak() make_simple(INL_LINEBREAK)
+#define make_softbreak() make_simple(INL_SOFTBREAK)
+#define make_link(label, url, title) make_linkable(INL_LINK, label, url, title)
+#define make_emph(contents) make_inlines(INL_EMPH, contents)
+#define make_strong(contents) make_inlines(INL_STRONG, contents)
// Free an inline list.
extern void free_inlines(inl* e)
@@ -167,23 +167,23 @@ extern void free_inlines(inl* e)
inl * next;
while (e != NULL) {
switch (e->tag){
- case str:
- case raw_html:
- case code:
- case entity:
+ case INL_STRING:
+ case INL_RAW_HTML:
+ case INL_CODE:
+ case INL_ENTITY:
chunk_free(&e->content.literal);
break;
- case linebreak:
- case softbreak:
+ case INL_LINEBREAK:
+ case INL_SOFTBREAK:
break;
- case link:
- case image:
+ case INL_LINK:
+ case INL_IMAGE:
free(e->content.linkable.url);
free(e->content.linkable.title);
free_inlines(e->content.linkable.label);
break;
- case emph:
- case strong:
+ case INL_EMPH:
+ case INL_STRONG:
free_inlines(e->content.inlines);
break;
default:
@@ -454,7 +454,7 @@ static inl* handle_strong_emph(subject* subj, char c)
numdelims = scan_delims(subj, c, &can_open, &can_close);
if (numdelims >= 1 && can_close) {
subj->pos += 1;
- first_head->tag = emph;
+ first_head->tag = INL_EMPH;
chunk_free(&first_head->content.literal);
first_head->content.inlines = first_head->next;
first_head->next = NULL;
@@ -471,7 +471,7 @@ static inl* handle_strong_emph(subject* subj, char c)
numdelims = scan_delims(subj, c, &can_open, &can_close);
if (numdelims >= 2 && can_close) {
subj->pos += 2;
- first_head->tag = strong;
+ first_head->tag = INL_STRONG;
chunk_free(&first_head->content.literal);
first_head->content.inlines = first_head->next;
first_head->next = NULL;
@@ -502,10 +502,10 @@ static inl* handle_strong_emph(subject* subj, char c)
}
subj->pos += numdelims;
if (first_close) {
- first_head->tag = first_close_delims == 1 ? strong : emph;
+ first_head->tag = first_close_delims == 1 ? INL_STRONG : INL_EMPH;
chunk_free(&first_head->content.literal);
first_head->content.inlines =
- make_inlines(first_close_delims == 1 ? emph : strong,
+ make_inlines(first_close_delims == 1 ? INL_EMPH : INL_STRONG,
first_head->next);
il = first_head->next;
@@ -989,8 +989,8 @@ static int parse_inline(subject* subj, inl ** last)
advance(subj);
if (peek_char(subj) == '[') {
new = handle_left_bracket(subj);
- if (new != NULL && new->tag == link) {
- new->tag = image;
+ if (new != NULL && new->tag == INL_LINK) {
+ new->tag = INL_IMAGE;
} else {
new = append_inlines(make_str(chunk_literal("!")), new);
}
diff --git a/src/print.c b/src/print.c
index 3ebde16..0a87925 100644
--- a/src/print.c
+++ b/src/print.c
@@ -124,35 +124,35 @@ extern void print_inlines(inl* ils, int indent)
putchar(' ');
}
switch(ils->tag) {
- case str:
+ case INL_STRING:
printf("str ");
print_str(ils->content.literal.data, ils->content.literal.len);
putchar('\n');
break;
- case linebreak:
+ case INL_LINEBREAK:
printf("linebreak\n");
break;
- case softbreak:
+ case INL_SOFTBREAK:
printf("softbreak\n");
break;
- case code:
+ case INL_CODE:
printf("code ");
print_str(ils->content.literal.data, ils->content.literal.len);
putchar('\n');
break;
- case raw_html:
+ case INL_RAW_HTML:
printf("html ");
print_str(ils->content.literal.data, ils->content.literal.len);
putchar('\n');
break;
- case entity:
+ case INL_ENTITY:
printf("entity ");
print_str(ils->content.literal.data, ils->content.literal.len);
putchar('\n');
break;
- case link:
- case image:
- printf("%s url=", ils->tag == link ? "link" : "image");
+ case INL_LINK:
+ case INL_IMAGE:
+ printf("%s url=", ils->tag == INL_LINK ? "link" : "image");
print_str(ils->content.linkable.url, -1);
if (ils->content.linkable.title) {
printf(" title=");
@@ -161,11 +161,11 @@ extern void print_inlines(inl* ils, int indent)
putchar('\n');
print_inlines(ils->content.linkable.label, indent + 2);
break;
- case strong:
+ case INL_STRONG:
printf("strong\n");
print_inlines(ils->content.linkable.label, indent + 2);
break;
- case emph:
+ case INL_EMPH:
printf("emph\n");
print_inlines(ils->content.linkable.label, indent + 2);
break;
diff --git a/src/stmd.h b/src/stmd.h
index dc24235..1e490d6 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -12,8 +12,8 @@ typedef struct {
} chunk;
typedef struct Inline {
- enum { str, softbreak, linebreak, code, raw_html, entity,
- emph, strong, link, image } tag;
+ enum { INL_STRING, INL_SOFTBREAK, INL_LINEBREAK, INL_CODE, INL_RAW_HTML, INL_ENTITY,
+ INL_EMPH, INL_STRONG, INL_LINK, INL_IMAGE } tag;
union {
chunk literal;
struct Inline *inlines;
--
cgit v1.2.3
From 7e12fdba0c9a444a3cfc29c520e2f2caa57a8232 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Tue, 2 Sep 2014 14:15:24 +0200
Subject: NO SEGFAULTS KTHX
---
src/inlines.c | 9 ++++++---
1 file changed, 6 insertions(+), 3 deletions(-)
(limited to 'src/inlines.c')
diff --git a/src/inlines.c b/src/inlines.c
index b9ece0e..7b48ad9 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -84,7 +84,7 @@ extern reference* lookup_reference(reference** refmap, chunk *label)
if (refmap != NULL) {
HASH_FIND_STR(*refmap, (char*)norm, ref);
}
- free(label);
+ free(norm);
return ref;
}
@@ -262,7 +262,7 @@ inline static unsigned char *chunk_to_cstr(chunk *c)
inline static chunk chunk_literal(const char *data)
{
- chunk c = {data, strlen(data), 0};
+ chunk c = {data, data ? strlen(data) : 0, 0};
return c;
}
@@ -937,6 +937,7 @@ static int find_special_char(subject *subj)
while (n < size) {
if (strchr("\n\\`&_*[]buffer, n)))
return n;
+ n++;
}
return -1;
@@ -974,7 +975,9 @@ static int parse_inline(subject* subj, inl ** last)
case '_':
if (subj->pos > 0 && (isalnum(gh_buf_at(subj->buffer, subj->pos - 1)) ||
gh_buf_at(subj->buffer, subj->pos - 1) == '_')) {
- goto text_literal;
+ new = make_str(chunk_literal("_"));
+ advance(subj);
+ break;
}
new = handle_strong_emph(subj, '_');
--
cgit v1.2.3
From a7314deae649646f1f7ce5ede972641b5b62538c Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Wed, 3 Sep 2014 03:40:23 +0200
Subject: 338/103
---
Makefile | 4 +-
src/blocks.c | 173 +++++++++++++++++-----------------
src/buffer.c | 26 ++++-
src/buffer.h | 2 +-
src/html/houdini_href_e.c | 10 +-
src/html/houdini_html_e.c | 10 +-
src/html/html.c | 4 +-
src/inlines.c | 235 ++++++++++++++++++----------------------------
src/print.c | 2 +-
src/scanners.h | 28 +++---
src/scanners.re | 85 +++++++----------
src/stmd.h | 16 ++--
12 files changed, 261 insertions(+), 334 deletions(-)
(limited to 'src/inlines.c')
diff --git a/Makefile b/Makefile
index d14a928..89ec68c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
-CFLAGS=-ggdb3 -O0 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS)
-LDFLAGS=-ggdb3 -O0 -Wall -Werror
+CFLAGS=-ggdb3 -O0 -Wall -Wextra -Wno-unused-variable -std=c99 -Isrc $(OPTFLAGS)
+LDFLAGS=-ggdb3 -O0 -Wall -Wno-unused-variable # -Werror
SRCDIR=src
DATADIR=data
diff --git a/src/blocks.c b/src/blocks.c
index 42f20db..94ff986 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -8,6 +8,8 @@
#include "scanners.h"
#include "uthash.h"
+#define peek_at(i, n) (i)->data[n]
+
static void incorporate_line(gh_buf *ln, int line_number, block** curptr);
static void finalize(block* b, int line_number);
@@ -27,7 +29,6 @@ static block* make_block(int tag, int start_line, int start_column)
e->top = NULL;
e->attributes.refmap = NULL;
gh_buf_init(&e->string_content, 32);
- e->string_pos = 0;
e->inline_content = NULL;
e->next = NULL;
e->prev = NULL;
@@ -80,10 +81,10 @@ static inline bool accepts_lines(int block_type)
block_type == fenced_code);
}
-static void add_line(block* block, gh_buf *ln, int offset)
+static void add_line(block* block, chunk *ch, int offset)
{
assert(block->open);
- gh_buf_put(&block->string_content, ln->ptr + offset, ln->size - offset);
+ gh_buf_put(&block->string_content, ch->data + offset, ch->len - offset);
}
static void remove_trailing_blank_lines(gh_buf *ln)
@@ -104,7 +105,7 @@ static void remove_trailing_blank_lines(gh_buf *ln)
i = gh_buf_strchr(ln, '\n', i);
if (i >= 0)
- gh_buf_truncate(ln, i + 1);
+ gh_buf_truncate(ln, i);
}
// Check to see if a block ends with a blank line, descending
@@ -162,12 +163,12 @@ static void finalize(block* b, int line_number)
switch (b->tag) {
case paragraph:
pos = 0;
- while (gh_buf_at(&b->string_content, b->string_pos) == '[' &&
- (pos = parse_reference(&b->string_content, b->string_pos,
- b->top->attributes.refmap))) {
- b->string_pos = pos;
+ while (gh_buf_at(&b->string_content, 0) == '[' &&
+ (pos = parse_reference(&b->string_content, b->top->attributes.refmap))) {
+
+ gh_buf_drop(&b->string_content, pos);
}
- if (is_blank(&b->string_content, b->string_pos)) {
+ if (is_blank(&b->string_content, 0)) {
b->tag = reference_def;
}
break;
@@ -179,14 +180,16 @@ static void finalize(block* b, int line_number)
case fenced_code:
// first line of contents becomes info
- firstlinelen = gh_buf_strchr(&b->string_content, '\n', b->string_pos);
+ firstlinelen = gh_buf_strchr(&b->string_content, '\n', 0);
+
+ gh_buf_init(&b->attributes.fenced_code_data.info, 0);
gh_buf_set(
&b->attributes.fenced_code_data.info,
- b->string_content.ptr + b->string_pos,
+ b->string_content.ptr,
firstlinelen
);
- b->string_pos = firstlinelen + 1;
+ gh_buf_drop(&b->string_content, firstlinelen + 1);
gh_buf_trim(&b->attributes.fenced_code_data.info);
unescape_buffer(&b->attributes.fenced_code_data.info);
@@ -281,7 +284,7 @@ void process_inlines(block* cur, reference** refmap)
case paragraph:
case atx_header:
case setext_header:
- cur->inline_content = parse_inlines(&cur->string_content, cur->string_pos, refmap);
+ cur->inline_content = parse_inlines(&cur->string_content, refmap);
// MEM
// gh_buf_free(&cur->string_content);
break;
@@ -300,19 +303,18 @@ void process_inlines(block* cur, reference** refmap)
// Attempts to parse a list item marker (bullet or enumerated).
// On success, returns length of the marker, and populates
// data with the details. On failure, returns 0.
-static int parse_list_marker(gh_buf *ln, int pos,
- struct ListData ** dataptr)
+static int parse_list_marker(chunk *input, int pos, struct ListData ** dataptr)
{
- char c;
+ unsigned char c;
int startpos;
struct ListData * data;
startpos = pos;
- c = gh_buf_at(ln, pos);
+ c = peek_at(input, pos);
- if ((c == '*' || c == '-' || c == '+') && !scan_hrule(ln, pos)) {
+ if ((c == '*' || c == '-' || c == '+') && !scan_hrule(input, pos)) {
pos++;
- if (!isspace(gh_buf_at(ln, pos))) {
+ if (!isspace(peek_at(input, pos))) {
return 0;
}
data = malloc(sizeof(struct ListData));
@@ -327,14 +329,14 @@ static int parse_list_marker(gh_buf *ln, int pos,
int start = 0;
do {
- start = (10 * start) + (gh_buf_at(ln, pos) - '0');
+ start = (10 * start) + (peek_at(input, pos) - '0');
pos++;
- } while (isdigit(gh_buf_at(ln, pos)));
+ } while (isdigit(peek_at(input, pos)));
- c = gh_buf_at(ln, pos);
+ c = peek_at(input, pos);
if (c == '.' || c == ')') {
pos++;
- if (!isspace(gh_buf_at(ln, pos))) {
+ if (!isspace(peek_at(input, pos))) {
return 0;
}
data = malloc(sizeof(struct ListData));
@@ -449,8 +451,26 @@ extern block *stmd_parse_document(const unsigned char *buffer, size_t len)
return finalize_document(document, linenum);
}
+static void chop_trailing_hashtags(chunk *ch)
+{
+ int n;
+
+ chunk_rtrim(ch);
+ n = ch->len - 1;
+
+ // if string ends in #s, remove these:
+ while (n >= 0 && peek_at(ch, n) == '#')
+ n--;
+
+ // the last # was escaped, so we include it.
+ if (n >= 0 && peek_at(ch, n) == '\\')
+ n++;
+
+ ch->len = n + 1;
+}
+
// Process one line at a time, modifying a block.
-static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
+static void incorporate_line(gh_buf *line, int line_number, block** curptr)
{
block* last_matched_container;
int offset = 0;
@@ -464,6 +484,10 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
bool blank = false;
int first_nonspace;
int indent;
+ chunk input;
+
+ input.data = line->ptr;
+ input.len = line->size;
// container starts at the document root.
container = cur->top;
@@ -475,21 +499,19 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
container = container->last_child;
first_nonspace = offset;
- while (gh_buf_at(ln, first_nonspace) == ' ') {
+ while (peek_at(&input, first_nonspace) == ' ') {
first_nonspace++;
}
indent = first_nonspace - offset;
- blank = gh_buf_at(ln, first_nonspace) == '\n';
+ blank = peek_at(&input, first_nonspace) == '\n';
if (container->tag == block_quote) {
-
- matched = indent <= 3 && gh_buf_at(ln, first_nonspace) == '>';
+ matched = indent <= 3 && peek_at(&input, first_nonspace) == '>';
if (matched) {
offset = first_nonspace + 1;
- if (gh_buf_at(ln, offset) == ' ') {
+ if (peek_at(&input, offset) == ' ')
offset++;
- }
} else {
all_matched = false;
}
@@ -526,7 +548,7 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
// skip optional spaces of fence offset
i = container->attributes.fenced_code_data.fence_offset;
- while (i > 0 && gh_buf_at(ln, offset) == ' ') {
+ while (i > 0 && peek_at(&input, offset) == ' ') {
offset++;
i--;
}
@@ -564,15 +586,13 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
container->tag != html_block) {
first_nonspace = offset;
- while (gh_buf_at(ln, first_nonspace) == ' ') {
+ while (peek_at(&input, first_nonspace) == ' ')
first_nonspace++;
- }
indent = first_nonspace - offset;
- blank = gh_buf_at(ln, first_nonspace) == '\n';
+ blank = peek_at(&input, first_nonspace) == '\n';
if (indent >= CODE_INDENT) {
-
if (cur->tag != paragraph && !blank) {
offset += CODE_INDENT;
container = add_child(container, indented_code, line_number, offset + 1);
@@ -580,76 +600,70 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
break;
}
- } else if (gh_buf_at(ln, first_nonspace) == '>') {
+ } else if (peek_at(&input, first_nonspace) == '>') {
offset = first_nonspace + 1;
// optional following character
- if (gh_buf_at(ln, offset) == ' ') {
+ if (peek_at(&input, offset) == ' ')
offset++;
- }
container = add_child(container, block_quote, line_number, offset + 1);
- } else if ((matched = scan_atx_header_start(ln, first_nonspace))) {
+ } else if ((matched = scan_atx_header_start(&input, first_nonspace))) {
offset = first_nonspace + matched;
container = add_child(container, atx_header, line_number, offset + 1);
- int hashpos = gh_buf_strchr(ln, '#', first_nonspace);
- assert(hashpos >= 0);
-
+ int hashpos = chunk_strchr(&input, '#', first_nonspace);
int level = 0;
- while (gh_buf_at(ln, hashpos) == '#') {
+
+ while (peek_at(&input, hashpos) == '#') {
level++;
hashpos++;
}
container->attributes.header_level = level;
- } else if ((matched = scan_open_code_fence(ln, first_nonspace))) {
+ } else if ((matched = scan_open_code_fence(&input, first_nonspace))) {
- container = add_child(container, fenced_code, line_number,
- first_nonspace + 1);
- container->attributes.fenced_code_data.fence_char = gh_buf_at(ln,
- first_nonspace);
+ container = add_child(container, fenced_code, line_number, first_nonspace + 1);
+ container->attributes.fenced_code_data.fence_char = peek_at(&input, first_nonspace);
container->attributes.fenced_code_data.fence_length = matched;
- container->attributes.fenced_code_data.fence_offset =
- first_nonspace - offset;
+ container->attributes.fenced_code_data.fence_offset = first_nonspace - offset;
offset = first_nonspace + matched;
- } else if ((matched = scan_html_block_tag(ln, first_nonspace))) {
+ } else if ((matched = scan_html_block_tag(&input, first_nonspace))) {
- container = add_child(container, html_block, line_number,
- first_nonspace + 1);
+ container = add_child(container, html_block, line_number, first_nonspace + 1);
// note, we don't adjust offset because the tag is part of the text
} else if (container->tag == paragraph &&
- (lev = scan_setext_header_line(ln, first_nonspace)) &&
+ (lev = scan_setext_header_line(&input, first_nonspace)) &&
// check that there is only one line in the paragraph:
gh_buf_strrchr(&container->string_content, '\n',
gh_buf_len(&container->string_content) - 2) < 0) {
container->tag = setext_header;
container->attributes.header_level = lev;
- offset = gh_buf_len(ln) - 1;
+ offset = input.len - 1;
} else if (!(container->tag == paragraph && !all_matched) &&
- (matched = scan_hrule(ln, first_nonspace))) {
+ (matched = scan_hrule(&input, first_nonspace))) {
// it's only now that we know the line is not part of a setext header:
container = add_child(container, hrule, line_number, first_nonspace + 1);
finalize(container, line_number);
container = container->parent;
- offset = gh_buf_len(ln) - 1;
+ offset = input.len - 1;
- } else if ((matched = parse_list_marker(ln, first_nonspace, &data))) {
+ } else if ((matched = parse_list_marker(&input, first_nonspace, &data))) {
// compute padding:
offset = first_nonspace + matched;
i = 0;
- while (i <= 5 && gh_buf_at(ln, offset + i) == ' ') {
+ while (i <= 5 && peek_at(&input, offset + i) == ' ') {
i++;
}
// i = number of spaces after marker, up to 5
- if (i >= 5 || i < 1 || gh_buf_at(ln, offset) == '\n') {
+ if (i >= 5 || i < 1 || peek_at(&input, offset) == '\n') {
data->padding = matched + 1;
if (i > 0) {
offset += 1;
@@ -674,6 +688,7 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
// add the list item
container = add_child(container, list_item, line_number,
first_nonspace + 1);
+ /* TODO: static */
container->attributes.list_data = *data;
free(data);
@@ -691,12 +706,11 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
// appropriate container.
first_nonspace = offset;
- while (gh_buf_at(ln, first_nonspace) == ' ') {
+ while (peek_at(&input, first_nonspace) == ' ')
first_nonspace++;
- }
indent = first_nonspace - offset;
- blank = gh_buf_at(ln, first_nonspace) == '\n';
+ blank = peek_at(&input, first_nonspace) == '\n';
// block quote lines are never blank as they start with >
// and we don't count blanks in fenced code for purposes of tight/loose
@@ -721,13 +735,12 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
cur->tag == paragraph &&
gh_buf_len(&cur->string_content) > 0) {
- add_line(cur, ln, offset);
+ add_line(cur, &input, offset);
} else { // not a lazy continuation
// finalize any blocks that were not matched and set cur to container:
while (cur != last_matched_container) {
-
finalize(cur, line_number);
cur = cur->parent;
assert(cur != NULL);
@@ -735,58 +748,46 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
if (container->tag == indented_code) {
- add_line(container, ln, offset);
+ add_line(container, &input, offset);
} else if (container->tag == fenced_code) {
matched = (indent <= 3
- && gh_buf_at(ln, first_nonspace) == container->attributes.fenced_code_data.fence_char)
- && scan_close_code_fence(ln, first_nonspace,
+ && peek_at(&input, first_nonspace) == container->attributes.fenced_code_data.fence_char)
+ && scan_close_code_fence(&input, first_nonspace,
container->attributes.fenced_code_data.fence_length);
if (matched) {
// if closing fence, don't add line to container; instead, close it:
finalize(container, line_number);
container = container->parent; // back up to parent
} else {
- add_line(container, ln, offset);
+ add_line(container, &input, offset);
}
} else if (container->tag == html_block) {
- add_line(container, ln, offset);
+ add_line(container, &input, offset);
} else if (blank) {
// ??? do nothing
} else if (container->tag == atx_header) {
- // chop off trailing ###s...use a scanner?
- gh_buf_trim(ln);
- int p = gh_buf_len(ln) - 1;
-
- // if string ends in #s, remove these:
- while (gh_buf_at(ln, p) == '#') {
- p--;
- }
- if (gh_buf_at(ln, p) == '\\') {
- // the last # was escaped, so we include it.
- p++;
- }
- gh_buf_truncate(ln, p + 1);
- add_line(container, ln, first_nonspace);
+ chop_trailing_hashtags(&input);
+ add_line(container, &input, first_nonspace);
finalize(container, line_number);
container = container->parent;
} else if (accepts_lines(container->tag)) {
- add_line(container, ln, first_nonspace);
+ add_line(container, &input, first_nonspace);
} else if (container->tag != hrule && container->tag != setext_header) {
// create paragraph container for line
container = add_child(container, paragraph, line_number, first_nonspace + 1);
- add_line(container, ln, first_nonspace);
+ add_line(container, &input, first_nonspace);
} else {
assert(false);
diff --git a/src/buffer.c b/src/buffer.c
index cfc6a7e..dc4a405 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -95,7 +95,7 @@ void gh_buf_clear(gh_buf *buf)
int gh_buf_set(gh_buf *buf, const unsigned char *data, int len)
{
- if (len == 0 || data == NULL) {
+ if (len <= 0 || data == NULL) {
gh_buf_clear(buf);
} else {
if (data != buf->ptr) {
@@ -125,6 +125,9 @@ int gh_buf_putc(gh_buf *buf, int c)
int gh_buf_put(gh_buf *buf, const unsigned char *data, int len)
{
+ if (len <= 0)
+ return 0;
+
ENSURE_SIZE(buf, buf->size + len + 1);
memmove(buf->ptr + buf->size, data, len);
buf->size += len;
@@ -272,15 +275,28 @@ void gh_buf_truncate(gh_buf *buf, int len)
}
}
+void gh_buf_drop(gh_buf *buf, int n)
+{
+ if (n > 0) {
+ buf->size = buf->size - n;
+ if (buf->size)
+ memmove(buf->ptr, buf->ptr + n, buf->size);
+
+ buf->ptr[buf->size] = '\0';
+ }
+}
+
void gh_buf_trim(gh_buf *buf)
{
- /* TODO: leading whitespace? */
- /*
+ int i = 0;
+
+ if (!buf->size)
+ return;
+
while (i < buf->size && isspace(buf->ptr[i]))
i++;
- gh_buf_truncate(buf, i);
- */
+ gh_buf_drop(buf, i);
/* rtrim */
while (buf->size > 0) {
diff --git a/src/buffer.h b/src/buffer.h
index 422ef02..0d5143e 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -105,8 +105,8 @@ extern void gh_buf_clear(gh_buf *buf);
int gh_buf_strchr(const gh_buf *buf, int c, int pos);
int gh_buf_strrchr(const gh_buf *buf, int c, int pos);
+void gh_buf_drop(gh_buf *buf, int n);
void gh_buf_truncate(gh_buf *buf, int len);
-void gh_buf_ltruncate(gh_buf *buf, int len);
void gh_buf_trim(gh_buf *buf);
#endif
diff --git a/src/html/houdini_href_e.c b/src/html/houdini_href_e.c
index 59fe850..b2a7d79 100644
--- a/src/html/houdini_href_e.c
+++ b/src/html/houdini_href_e.c
@@ -62,16 +62,8 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
while (i < size && HREF_SAFE[src[i]] != 0)
i++;
- if (likely(i > org)) {
- if (unlikely(org == 0)) {
- if (i >= size)
- return 0;
-
- gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
- }
-
+ if (likely(i > org))
gh_buf_put(ob, src + org, i - org);
- }
/* escaping */
if (i >= size)
diff --git a/src/html/houdini_html_e.c b/src/html/houdini_html_e.c
index 316c5ce..5cdd3dd 100644
--- a/src/html/houdini_html_e.c
+++ b/src/html/houdini_html_e.c
@@ -54,16 +54,8 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
i++;
- if (i > org) {
- if (unlikely(org == 0)) {
- if (i >= size)
- return 0;
-
- gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
- }
-
+ if (i > org)
gh_buf_put(ob, src + org, i - org);
- }
/* escaping */
if (unlikely(i >= size))
diff --git a/src/html/html.c b/src/html/html.c
index 2f160ca..27ffe58 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -68,7 +68,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
cr(html);
gh_buf_puts(html, "");
blocks_to_html(html, b->children, tight);
- gh_buf_trim(html);
+ gh_buf_trim(html); /* TODO: rtrim */
gh_buf_puts(html, "");
cr(html);
break;
@@ -106,7 +106,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
cr(html);
gh_buf_puts(html, "");
escape_html(html, b->string_content.ptr, b->string_content.size);
- gh_buf_puts(html, "
");
+ gh_buf_puts(html, "");
cr(html);
break;
diff --git a/src/inlines.c b/src/inlines.c
index 7b48ad9..ef27a24 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -9,10 +9,10 @@
#include "scanners.h"
typedef struct Subject {
- const gh_buf *buffer;
- int pos;
- reference** reference_map;
- int label_nestlevel;
+ chunk input;
+ int pos;
+ int label_nestlevel;
+ reference** reference_map;
} subject;
reference* lookup_reference(reference** refmap, chunk *label);
@@ -27,12 +27,16 @@ inline static void chunk_trim(chunk *c);
inline static chunk chunk_literal(const char *data);
inline static chunk chunk_buf_detach(gh_buf *buf);
-inline static chunk chunk_buf(const gh_buf *buf, int pos, int len);
+inline static chunk chunk_dup(const chunk *ch, int pos, int len);
static inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
static inl *parse_inlines_while(subject* subj, int (*f)(subject*));
static int parse_inline(subject* subj, inl ** last);
+static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap);
+static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap);
+static int subject_find_special_char(subject *subj);
+
extern void free_reference(reference *ref) {
free(ref->label);
free(ref->url);
@@ -101,10 +105,12 @@ extern reference* make_reference(chunk *label, chunk *url, chunk *title)
extern void add_reference(reference** refmap, reference* ref)
{
reference * t = NULL;
- HASH_FIND(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), t);
+ const char *label = (const char *)ref->label;
+
+ HASH_FIND(hh, *refmap, label, strlen(label), t);
if (t == NULL) {
- HASH_ADD_KEYPTR(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), ref);
+ HASH_ADD_KEYPTR(hh, *refmap, label, strlen(label), ref);
} else {
free_reference(ref); // we free this now since it won't be in the refmap
}
@@ -210,87 +216,49 @@ inline static inl* append_inlines(inl* a, inl* b)
return a;
}
-// Make a 'subject' from an input string.
-static void init_subject(subject *e, gh_buf *buffer, int input_pos, reference** refmap)
+static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap)
{
- e->buffer = buffer;
- e->pos = input_pos;
+ e->input.data = buffer->ptr;
+ e->input.len = buffer->size;
+ e->input.alloc = 0;
+ e->pos = 0;
e->label_nestlevel = 0;
e->reference_map = refmap;
-}
-
-inline static int isbacktick(int c)
-{
- return (c == '`');
-}
-
-inline static void chunk_free(chunk *c)
-{
- if (c->alloc)
- free((char *)c->data);
-
- c->data = NULL;
- c->alloc = 0;
- c->len = 0;
-}
-
-inline static void chunk_trim(chunk *c)
-{
- while (c->len && isspace(c->data[0])) {
- c->data++;
- c->len--;
- }
-
- while (c->len > 0) {
- if (!isspace(c->data[c->len - 1]))
- break;
- c->len--;
- }
+ chunk_rtrim(&e->input);
}
-inline static unsigned char *chunk_to_cstr(chunk *c)
+static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap)
{
- unsigned char *str;
-
- str = malloc(c->len + 1);
- memcpy(str, c->data, c->len);
- str[c->len] = 0;
+ e->input.data = chunk->data;
+ e->input.len = chunk->len;
+ e->input.alloc = 0;
+ e->pos = 0;
+ e->label_nestlevel = 0;
+ e->reference_map = refmap;
- return str;
+ chunk_rtrim(&e->input);
}
-inline static chunk chunk_literal(const char *data)
+inline static int isbacktick(int c)
{
- chunk c = {data, data ? strlen(data) : 0, 0};
- return c;
+ return (c == '`');
}
-inline static chunk chunk_buf(const gh_buf *buf, int pos, int len)
+static inline unsigned char peek_char(subject *subj)
{
- chunk c = {buf->ptr + pos, len, 0};
- return c;
+ return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
}
-inline static chunk chunk_buf_detach(gh_buf *buf)
+static inline unsigned char peek_at(subject *subj, int pos)
{
- chunk c;
-
- c.len = buf->size;
- c.data = gh_buf_detach(buf);
- c.alloc = 1;
-
- return c;
+ return subj->input.data[pos];
}
-// Return the next character in the subject, without advancing.
-// Return 0 if at the end of the subject.
-#define peek_char(subj) gh_buf_at((subj)->buffer, (subj)->pos)
-
// Return true if there are more characters in the subject.
inline static int is_eof(subject* subj)
{
- return (subj->pos >= gh_buf_len(subj->buffer));
+ return (subj->pos >= subj->input.len);
}
// Advance the subject. Doesn't check for eof.
@@ -308,7 +276,7 @@ inline static chunk take_while(subject* subj, int (*f)(int))
len++;
}
- return chunk_buf(subj->buffer, startpos, len);
+ return chunk_dup(&subj->input, startpos, len);
}
// Try to process a backtick code span that began with a
@@ -388,7 +356,7 @@ static inl* handle_backticks(subject *subj)
} else {
gh_buf buf = GH_BUF_INIT;
- gh_buf_set(&buf, subj->buffer->ptr + startpos, endpos - startpos - openticks.len);
+ gh_buf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
gh_buf_trim(&buf);
normalize_whitespace(&buf);
@@ -404,7 +372,7 @@ static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close)
char char_before, char_after;
int startpos = subj->pos;
- char_before = subj->pos == 0 ? '\n' : gh_buf_at(subj->buffer, subj->pos - 1);
+ char_before = subj->pos == 0 ? '\n' : peek_at(subj, subj->pos - 1);
while (peek_char(subj) == c) {
numdelims++;
advance(subj);
@@ -439,7 +407,7 @@ static inl* handle_strong_emph(subject* subj, char c)
numdelims = scan_delims(subj, c, &can_open, &can_close);
subj->pos += numdelims;
- new = make_str(chunk_buf(subj->buffer, subj->pos - numdelims, numdelims));
+ new = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims));
*last = new;
first_head = new;
result = new;
@@ -488,7 +456,7 @@ static inl* handle_strong_emph(subject* subj, char c)
numdelims = scan_delims(subj, c, &can_open, &can_close);
if (can_close && numdelims >= 1 && numdelims <= 3 &&
numdelims != first_close_delims) {
- new = make_str(chunk_buf(subj->buffer, subj->pos, numdelims));
+ new = make_str(chunk_dup(&subj->input, subj->pos, numdelims));
append_inlines(*last, new);
*last = new;
if (first_close_delims == 1 && numdelims > 2) {
@@ -554,7 +522,7 @@ static inl* handle_backslash(subject *subj)
unsigned char nextchar = peek_char(subj);
if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped
advance(subj);
- return make_str(chunk_buf(subj->buffer, subj->pos - 1, 1));
+ return make_str(chunk_dup(&subj->input, subj->pos - 1, 1));
} else if (nextchar == '\n') {
advance(subj);
return make_linebreak();
@@ -569,9 +537,9 @@ static inl* handle_entity(subject* subj)
{
int match;
inl *result;
- match = scan_entity(subj->buffer, subj->pos);
+ match = scan_entity(&subj->input, subj->pos);
if (match) {
- result = make_entity(chunk_buf(subj->buffer, subj->pos, match));
+ result = make_entity(chunk_dup(&subj->input, subj->pos, match));
subj->pos += match;
} else {
advance(subj);
@@ -584,15 +552,13 @@ static inl* handle_entity(subject* subj)
// Returns an inline sequence consisting of str and entity elements.
static inl *make_str_with_entities(chunk *content)
{
- inl * result = NULL;
- inl * new;
+ inl *result = NULL;
+ inl *new;
int searchpos;
char c;
subject subj;
- gh_buf content_buf = GH_BUF_INIT;
- gh_buf_set(&content_buf, content->data, content->len);
- init_subject(&subj, &content_buf, 0, NULL);
+ subject_from_chunk(&subj, content, NULL);
while ((c = peek_char(&subj))) {
switch (c) {
@@ -600,18 +566,13 @@ static inl *make_str_with_entities(chunk *content)
new = handle_entity(&subj);
break;
default:
- searchpos = gh_buf_strchr(subj.buffer, '&', subj.pos);
- if (searchpos < 0) {
- searchpos = gh_buf_len(subj.buffer);
- }
-
- new = make_str(chunk_buf(subj.buffer, subj.pos, searchpos - subj.pos));
+ searchpos = chunk_strchr(&subj.input, '&', subj.pos);
+ new = make_str(chunk_dup(&subj.input, subj.pos, searchpos - subj.pos));
subj.pos = searchpos;
}
result = append_inlines(result, new);
}
- gh_buf_free(&content_buf);
return result;
}
@@ -678,9 +639,9 @@ static inl* handle_pointy_brace(subject* subj)
advance(subj); // advance past first <
// first try to match a URL autolink
- matchlen = scan_autolink_uri(subj->buffer, subj->pos);
+ matchlen = scan_autolink_uri(&subj->input, subj->pos);
if (matchlen > 0) {
- contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1);
+ contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
subj->pos += matchlen;
return make_link(
@@ -691,11 +652,11 @@ static inl* handle_pointy_brace(subject* subj)
}
// next try to match an email autolink
- matchlen = scan_autolink_email(subj->buffer, subj->pos);
+ matchlen = scan_autolink_email(&subj->input, subj->pos);
if (matchlen > 0) {
gh_buf mail_url = GH_BUF_INIT;
- contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1);
+ contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
subj->pos += matchlen;
gh_buf_puts(&mail_url, "mailto:");
@@ -709,9 +670,9 @@ static inl* handle_pointy_brace(subject* subj)
}
// finally, try to match an html tag
- matchlen = scan_html_tag(subj->buffer, subj->pos);
+ matchlen = scan_html_tag(&subj->input, subj->pos);
if (matchlen > 0) {
- contents = chunk_buf(subj->buffer, subj->pos - 1, matchlen + 1);
+ contents = chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
subj->pos += matchlen;
return make_raw_html(contents);
}
@@ -776,12 +737,7 @@ static int link_label(subject* subj, chunk *raw_label)
}
}
if (c == ']') {
- *raw_label = chunk_buf(
- subj->buffer,
- startpos + 1,
- subj->pos - (startpos + 1)
- );
-
+ *raw_label = chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
subj->label_nestlevel = 0;
advance(subj); // advance past ]
return 1;
@@ -813,25 +769,25 @@ static inl* handle_left_bracket(subject* subj)
if (found_label) {
if (peek_char(subj) == '(' &&
- ((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) &&
- ((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) {
+ ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
+ ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {
// try to parse an explicit link:
starturl = subj->pos + 1 + sps; // after (
endurl = starturl + n;
- starttitle = endurl + scan_spacechars(subj->buffer, endurl);
+ starttitle = endurl + scan_spacechars(&subj->input, endurl);
// ensure there are spaces btw url and title
endtitle = (starttitle == endurl) ? starttitle :
- starttitle + scan_link_title(subj->buffer, starttitle);
+ starttitle + scan_link_title(&subj->input, starttitle);
- endall = endtitle + scan_spacechars(subj->buffer, endtitle);
+ endall = endtitle + scan_spacechars(&subj->input, endtitle);
- if (gh_buf_at(subj->buffer, endall) == ')') {
+ if (peek_at(subj, endall) == ')') {
subj->pos = endall + 1;
- url = chunk_buf(subj->buffer, starturl, endurl - starturl);
- title = chunk_buf(subj->buffer, starttitle, endtitle - starttitle);
+ url = chunk_dup(&subj->input, starturl, endurl - starturl);
+ title = chunk_dup(&subj->input, starttitle, endtitle - starttitle);
lab = parse_chunk_inlines(&rawlabel, NULL);
return make_link(lab, url, title);
@@ -850,7 +806,7 @@ static inl* handle_left_bracket(subject* subj)
// Check for reference link.
// First, see if there's another label:
- subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel);
+ subj->pos = subj->pos + scan_spacechars(&subj->input, endlabel);
reflabel = rawlabel;
// if followed by a nonempty link label, we change reflabel to it:
@@ -892,8 +848,8 @@ static inl* handle_newline(subject *subj)
advance(subj);
}
if (nlpos > 1 &&
- gh_buf_at(subj->buffer, nlpos - 1) == ' ' &&
- gh_buf_at(subj->buffer, nlpos - 2) == ' ') {
+ peek_at(subj, nlpos - 1) == ' ' &&
+ peek_at(subj, nlpos - 2) == ' ') {
return make_linebreak();
} else {
return make_softbreak();
@@ -917,30 +873,22 @@ extern inl* parse_inlines_while(subject* subj, int (*f)(subject*))
inl *parse_chunk_inlines(chunk *chunk, reference** refmap)
{
- inl *result;
subject subj;
- gh_buf full_chunk = GH_BUF_INIT;
-
- gh_buf_set(&full_chunk, chunk->data, chunk->len);
- init_subject(&subj, &full_chunk, 0, refmap);
- result = parse_inlines_while(&subj, not_eof);
-
- gh_buf_free(&full_chunk);
- return result;
+ subject_from_chunk(&subj, chunk, refmap);
+ return parse_inlines_while(&subj, not_eof);
}
-static int find_special_char(subject *subj)
+static int subject_find_special_char(subject *subj)
{
int n = subj->pos + 1;
- int size = (int)gh_buf_len(subj->buffer);
- while (n < size) {
- if (strchr("\n\\`&_*[]buffer, n)))
+ while (n < subj->input.len) {
+ if (strchr("\n\\`&_*[]input.data[n]))
return n;
n++;
}
- return -1;
+ return subj->input.len;
}
// Parse an inline, advancing subject, and add it to last element.
@@ -973,11 +921,13 @@ static int parse_inline(subject* subj, inl ** last)
new = handle_pointy_brace(subj);
break;
case '_':
- if (subj->pos > 0 && (isalnum(gh_buf_at(subj->buffer, subj->pos - 1)) ||
- gh_buf_at(subj->buffer, subj->pos - 1) == '_')) {
- new = make_str(chunk_literal("_"));
- advance(subj);
- break;
+ if (subj->pos > 0) {
+ unsigned char prev = peek_at(subj, subj->pos - 1);
+ if (isalnum(prev) || prev == '_') {
+ new = make_str(chunk_literal("_"));
+ advance(subj);
+ break;
+ }
}
new = handle_strong_emph(subj, '_');
@@ -1002,18 +952,13 @@ static int parse_inline(subject* subj, inl ** last)
}
break;
default:
- text_literal:
- endpos = find_special_char(subj);
- if (endpos < 0) {
- endpos = gh_buf_len(subj->buffer);
- }
-
- contents = chunk_buf(subj->buffer, subj->pos, endpos - subj->pos);
+ endpos = subject_find_special_char(subj);
+ contents = chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
subj->pos = endpos;
// if we're at a newline, strip trailing spaces.
if (peek_char(subj) == '\n') {
- chunk_trim(&contents);
+ chunk_rtrim(&contents);
}
new = make_str(contents);
@@ -1026,10 +971,10 @@ static int parse_inline(subject* subj, inl ** last)
return 1;
}
-extern inl* parse_inlines(gh_buf *input, int input_pos, reference** refmap)
+extern inl* parse_inlines(gh_buf *input, reference** refmap)
{
subject subj;
- init_subject(&subj, input, input_pos, refmap);
+ subject_from_buf(&subj, input, refmap);
return parse_inlines_while(&subj, not_eof);
}
@@ -1048,7 +993,7 @@ void spnl(subject* subj)
// Modify refmap if a reference is encountered.
// Return 0 if no reference found, otherwise position of subject
// after reference is parsed.
-extern int parse_reference(gh_buf *input, int input_pos, reference** refmap)
+extern int parse_reference(gh_buf *input, reference** refmap)
{
subject subj;
@@ -1058,9 +1003,9 @@ extern int parse_reference(gh_buf *input, int input_pos, reference** refmap)
int matchlen = 0;
int beforetitle;
- reference * new = NULL;
+ reference *new = NULL;
- init_subject(&subj, input, input_pos, NULL);
+ subject_from_buf(&subj, input, NULL);
// parse label:
if (!link_label(&subj, &lab))
@@ -1075,9 +1020,9 @@ extern int parse_reference(gh_buf *input, int input_pos, reference** refmap)
// parse link url:
spnl(&subj);
- matchlen = scan_link_url(subj.buffer, subj.pos);
+ matchlen = scan_link_url(&subj.input, subj.pos);
if (matchlen) {
- url = chunk_buf(subj.buffer, subj.pos, matchlen);
+ url = chunk_dup(&subj.input, subj.pos, matchlen);
subj.pos += matchlen;
} else {
return 0;
@@ -1086,9 +1031,9 @@ extern int parse_reference(gh_buf *input, int input_pos, reference** refmap)
// parse optional link_title
beforetitle = subj.pos;
spnl(&subj);
- matchlen = scan_link_title(subj.buffer, subj.pos);
+ matchlen = scan_link_title(&subj.input, subj.pos);
if (matchlen) {
- title = chunk_buf(subj.buffer, subj.pos, matchlen);
+ title = chunk_dup(&subj.input, subj.pos, matchlen);
subj.pos += matchlen;
} else {
subj.pos = beforetitle;
diff --git a/src/print.c b/src/print.c
index 0a87925..c262995 100644
--- a/src/print.c
+++ b/src/print.c
@@ -9,7 +9,7 @@ static void print_str(const unsigned char *s, int len)
int i;
if (len < 0)
- len = strlen(s);
+ len = strlen((char *)s);
putchar('"');
for (i = 0; i < len; ++i) {
diff --git a/src/scanners.h b/src/scanners.h
index b6e586b..f96c42d 100644
--- a/src/scanners.h
+++ b/src/scanners.h
@@ -1,15 +1,15 @@
-#include "buffer.h"
+#include "stmd.h"
-int scan_autolink_uri(const gh_buf *s, int pos);
-int scan_autolink_email(const gh_buf *s, int pos);
-int scan_html_tag(const gh_buf *s, int pos);
-int scan_html_block_tag(const gh_buf *s, int pos);
-int scan_link_url(const gh_buf *s, int pos);
-int scan_link_title(const gh_buf *s, int pos);
-int scan_spacechars(const gh_buf *s, int pos);
-int scan_atx_header_start(const gh_buf *s, int pos);
-int scan_setext_header_line(const gh_buf *s, int pos);
-int scan_hrule(const gh_buf *s, int pos);
-int scan_open_code_fence(const gh_buf *s, int pos);
-int scan_close_code_fence(const gh_buf *s, int pos, int len);
-int scan_entity(const gh_buf *s, int pos);
+int scan_autolink_uri(chunk *c, int offset);
+int scan_autolink_email(chunk *c, int offset);
+int scan_html_tag(chunk *c, int offset);
+int scan_html_block_tag(chunk *c, int offset);
+int scan_link_url(chunk *c, int offset);
+int scan_link_title(chunk *c, int offset);
+int scan_spacechars(chunk *c, int offset);
+int scan_atx_header_start(chunk *c, int offset);
+int scan_setext_header_line(chunk *c, int offset);
+int scan_hrule(chunk *c, int offset);
+int scan_open_code_fence(chunk *c, int offset);
+int scan_close_code_fence(chunk *c, int offset, int len);
+int scan_entity(chunk *c, int offset);
diff --git a/src/scanners.re b/src/scanners.re
index 7323ef9..5ac7c15 100644
--- a/src/scanners.re
+++ b/src/scanners.re
@@ -1,8 +1,15 @@
-#include "buffer.h"
+#include "scanners.h"
+
+#define SCAN_DATA \
+ const unsigned char *marker = NULL; \
+ const unsigned char *p = c->data + offset; \
+ const unsigned char *start = p; \
+ const unsigned char *end = c->data + c->len
/*!re2c
re2c:define:YYCTYPE = "unsigned char";
re2c:define:YYCURSOR = p;
+ re2c:define:YYLIMIT = end;
re2c:define:YYMARKER = marker;
re2c:define:YYCTXMARKER = marker;
re2c:yyfill:enable = 0;
@@ -55,11 +62,9 @@
*/
// Try to match URI autolink after first <, returning number of chars matched.
-extern int scan_autolink_uri(const gh_buf *s, int pos)
+extern int scan_autolink_uri(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
scheme [:]([^\x00-\x20<>\\]|escaped_char)*[>] { return (p - start); }
.? { return 0; }
@@ -67,11 +72,9 @@ extern int scan_autolink_uri(const gh_buf *s, int pos)
}
// Try to match email autolink after first <, returning num of chars matched.
-extern int scan_autolink_email(const gh_buf *s, int pos)
+extern int scan_autolink_email(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+
[@]
@@ -83,11 +86,9 @@ extern int scan_autolink_email(const gh_buf *s, int pos)
}
// Try to match an HTML tag after first <, returning num of chars matched.
-extern int scan_html_tag(const gh_buf *s, int pos)
+extern int scan_html_tag(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
htmltag { return (p - start); }
.? { return 0; }
@@ -96,11 +97,9 @@ extern int scan_html_tag(const gh_buf *s, int pos)
// Try to match an HTML block tag including first <,
// returning num of chars matched.
-extern int scan_html_block_tag(const gh_buf *s, int pos)
+extern int scan_html_block_tag(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
[<] [/] blocktagname (spacechar | [>]) { return (p - start); }
[<] blocktagname (spacechar | [/>]) { return (p - start); }
@@ -113,11 +112,9 @@ extern int scan_html_block_tag(const gh_buf *s, int pos)
// This may optionally be contained in <..>; otherwise
// whitespace and unbalanced right parentheses aren't allowed.
// Newlines aren't ever allowed.
-extern int scan_link_url(const gh_buf *s, int pos)
+extern int scan_link_url(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
[ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); }
[ \n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); }
@@ -128,11 +125,9 @@ extern int scan_link_url(const gh_buf *s, int pos)
// Try to match a link title (in single quotes, in double quotes, or
// in parentheses), returning number of chars matched. Allow one
// level of internal nesting (quotes within quotes).
-extern int scan_link_title(const gh_buf *s, int pos)
+extern int scan_link_title(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
["] (escaped_char|[^"\x00])* ["] { return (p - start); }
['] (escaped_char|[^'\x00])* ['] { return (p - start); }
@@ -142,10 +137,9 @@ extern int scan_link_title(const gh_buf *s, int pos)
}
// Match space characters, including newlines.
-extern int scan_spacechars(const gh_buf *s, int pos)
+extern int scan_spacechars(chunk *c, int offset)
{
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
[ \t\n]* { return (p - start); }
. { return 0; }
@@ -153,11 +147,9 @@ extern int scan_spacechars(const gh_buf *s, int pos)
}
// Match ATX header start.
-extern int scan_atx_header_start(const gh_buf *s, int pos)
+extern int scan_atx_header_start(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
[#]{1,6} ([ ]+|[\n]) { return (p - start); }
.? { return 0; }
@@ -166,10 +158,9 @@ extern int scan_atx_header_start(const gh_buf *s, int pos)
// Match sexext header line. Return 1 for level-1 header,
// 2 for level-2, 0 for no match.
-extern int scan_setext_header_line(const gh_buf *s, int pos)
+extern int scan_setext_header_line(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
+ SCAN_DATA;
/*!re2c
[=]+ [ ]* [\n] { return 1; }
[-]+ [ ]* [\n] { return 2; }
@@ -180,11 +171,9 @@ extern int scan_setext_header_line(const gh_buf *s, int pos)
// Scan a horizontal rule line: "...three or more hyphens, asterisks,
// or underscores on a line by themselves. If you wish, you may use
// spaces between the hyphens or asterisks."
-extern int scan_hrule(const gh_buf *s, int pos)
+extern int scan_hrule(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
([*][ ]*){3,} [ \t]* [\n] { return (p - start); }
([_][ ]*){3,} [ \t]* [\n] { return (p - start); }
@@ -194,11 +183,9 @@ extern int scan_hrule(const gh_buf *s, int pos)
}
// Scan an opening code fence.
-extern int scan_open_code_fence(const gh_buf *s, int pos)
+extern int scan_open_code_fence(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
[`]{3,} / [^`\n\x00]*[\n] { return (p - start); }
[~]{3,} / [^~\n\x00]*[\n] { return (p - start); }
@@ -207,11 +194,9 @@ extern int scan_open_code_fence(const gh_buf *s, int pos)
}
// Scan a closing code fence with length at least len.
-extern int scan_close_code_fence(const gh_buf *s, int pos, int len)
+extern int scan_close_code_fence(chunk *c, int offset, int len)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
([`]{3,} | [~]{3,}) / spacechar* [\n]
{ if (p - start > len) {
@@ -225,11 +210,9 @@ extern int scan_close_code_fence(const gh_buf *s, int pos, int len)
// Scans an entity.
// Returns number of chars matched.
-extern int scan_entity(const gh_buf *s, int pos)
+extern int scan_entity(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
[&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;]
{ return (p - start); }
diff --git a/src/stmd.h b/src/stmd.h
index 3e284bd..4a3c399 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -1,17 +1,15 @@
+#ifndef _STDMD_H_
+#define _STDMD_H_
+
#include
#include
#include "buffer.h"
+#include "chunk.h"
#include "uthash.h"
#define VERSION "0.1"
#define CODE_INDENT 4
-typedef struct {
- const unsigned char *data;
- int len;
- int alloc;
-} chunk;
-
typedef struct Inline {
enum { INL_STRING, INL_SOFTBREAK, INL_LINEBREAK, INL_CODE, INL_RAW_HTML, INL_ENTITY,
INL_EMPH, INL_STRONG, INL_LINK, INL_IMAGE } tag;
@@ -79,7 +77,6 @@ typedef struct Block {
struct Block* parent;
struct Block* top;
gh_buf string_content;
- int string_pos;
inl* inline_content;
union {
struct ListData list_data;
@@ -91,10 +88,10 @@ typedef struct Block {
struct Block * prev;
} block;
-inl* parse_inlines(gh_buf *input, int input_pos, reference** refmap);
+inl* parse_inlines(gh_buf *input, reference** refmap);
void free_inlines(inl* e);
-int parse_reference(gh_buf *input, int input_pos, reference** refmap);
+int parse_reference(gh_buf *input, reference** refmap);
void free_reference(reference *ref);
void free_reference_map(reference **refmap);
@@ -117,3 +114,4 @@ void inlines_to_html(gh_buf *html, inl *b);
void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len);
+#endif
--
cgit v1.2.3
From f5168c63ad305b3e331eb7d31efaf46b0541bba4 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Thu, 4 Sep 2014 06:41:18 +0200
Subject: 368/73
---
src/blocks.c | 1 +
src/inlines.c | 2 +-
2 files changed, 2 insertions(+), 1 deletion(-)
(limited to 'src/inlines.c')
diff --git a/src/blocks.c b/src/blocks.c
index 94ff986..bd25d6c 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -57,6 +57,7 @@ bool is_blank(gh_buf *s, int offset)
return true;
case ' ':
offset++;
+ break;
default:
return false;
}
diff --git a/src/inlines.c b/src/inlines.c
index ef27a24..ced4673 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -123,7 +123,7 @@ inline static inl* make_linkable(int t, inl* label, chunk url, chunk title)
e->tag = t;
e->content.linkable.label = label;
e->content.linkable.url = chunk_to_cstr(&url);
- e->content.linkable.title = url.len ? chunk_to_cstr(&title) : NULL;
+ e->content.linkable.title = title.len ? chunk_to_cstr(&title) : NULL;
e->next = NULL;
return e;
}
--
cgit v1.2.3
From 45c1d9fadb3e8aab4a01bb27a4e2ece379902d1a Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Thu, 4 Sep 2014 17:26:11 +0200
Subject: 426/15
---
runtests.pl | 3 ++
spec.txt | 6 ++--
src/html/html.c | 22 +++++-------
src/inlines.c | 105 +++++++++++++++++++++++++++++---------------------------
4 files changed, 69 insertions(+), 67 deletions(-)
(limited to 'src/inlines.c')
diff --git a/runtests.pl b/runtests.pl
index 2e2b795..e53938d 100644
--- a/runtests.pl
+++ b/runtests.pl
@@ -49,6 +49,7 @@ sub tidy
s/ */ /;
# collapse space before /> in tag
s/ *\/>/\/>/;
+ s/>\n$/>/;
# skip blank line
if (/^$/) {
next;
@@ -89,8 +90,10 @@ sub dotest
print $markdown;
print "=== expected ===============\n";
print $html;
+ print "\n";
print "=== got ====================\n";
print $actual;
+ print "\n";
print color "black";
return 0;
}
diff --git a/spec.txt b/spec.txt
index 82ae0b6..d7e70f5 100644
--- a/spec.txt
+++ b/spec.txt
@@ -1682,7 +1682,7 @@ them.
[Foo bar]
.
-Foo bar
+Foo bar
.
The title may be omitted:
@@ -1745,7 +1745,7 @@ case-insensitive (see [matches](#matches)).
[αγω]
.
-αγω
+αγω
.
Here is a link reference definition with no corresponding link.
@@ -3688,7 +3688,7 @@ raw HTML:
.
.
-http://google.com?find=\*
+http://google.com?find=\*
.
.
diff --git a/src/html/html.c b/src/html/html.c
index 2a65a63..cdccf2a 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -50,17 +50,15 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
cr(html);
gh_buf_puts(html, "");
inlines_to_html(html, b->inline_content);
- gh_buf_puts(html, "
");
- cr(html);
+ gh_buf_puts(html, "
\n");
}
break;
case block_quote:
cr(html);
- gh_buf_puts(html, "");
+ gh_buf_puts(html, "\n");
blocks_to_html(html, b->children, false);
- gh_buf_puts(html, "
");
- cr(html);
+ gh_buf_puts(html, "
\n");
break;
case list_item:
@@ -68,8 +66,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
gh_buf_puts(html, "");
blocks_to_html(html, b->children, tight);
gh_buf_trim(html); /* TODO: rtrim */
- gh_buf_puts(html, "");
- cr(html);
+ gh_buf_puts(html, "\n");
break;
case list:
@@ -87,7 +84,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
blocks_to_html(html, b->children, data->tight);
gh_buf_puts(html, data->list_type == bullet ? "" : "");
- cr(html);
+ gh_buf_putc(html, '\n');
break;
case atx_header:
@@ -95,8 +92,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
cr(html);
gh_buf_printf(html, "", b->attributes.header_level);
inlines_to_html(html, b->inline_content);
- gh_buf_printf(html, "", b->attributes.header_level);
- cr(html);
+ gh_buf_printf(html, "\n", b->attributes.header_level);
break;
case indented_code:
@@ -122,8 +118,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
gh_buf_puts(html, ">");
escape_html(html, b->string_content.ptr, b->string_content.size);
- gh_buf_puts(html, "
");
- cr(html);
+ gh_buf_puts(html, "\n");
break;
case html_block:
@@ -131,8 +126,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
break;
case hrule:
- gh_buf_puts(html, "
");
- cr(html);
+ gh_buf_puts(html, "
\n");
break;
case reference_def:
diff --git a/src/inlines.c b/src/inlines.c
index ced4673..a0dcac9 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -1,8 +1,8 @@
#include
+#include
#include
#include
#include
-#include
#include "stmd.h"
#include "uthash.h"
@@ -18,7 +18,7 @@ typedef struct Subject {
reference* lookup_reference(reference** refmap, chunk *label);
reference* make_reference(chunk *label, chunk *url, chunk *title);
-static unsigned char *clean_url(chunk *url);
+static unsigned char *clean_url(chunk *url, int is_email);
static unsigned char *clean_title(chunk *title);
inline static unsigned char *chunk_to_cstr(chunk *c);
@@ -97,7 +97,7 @@ extern reference* make_reference(chunk *label, chunk *url, chunk *title)
reference *ref;
ref = malloc(sizeof(reference));
ref->label = normalize_reference(label);
- ref->url = clean_url(url);
+ ref->url = clean_url(url, 0);
ref->title = clean_title(title);
return ref;
}
@@ -116,14 +116,25 @@ extern void add_reference(reference** refmap, reference* ref)
}
}
+inline static inl* make_link_from_reference(inl* label, reference *ref)
+{
+ inl* e = (inl*) malloc(sizeof(inl));
+ e->tag = INL_LINK;
+ e->content.linkable.label = label;
+ e->content.linkable.url = strdup(ref->url);
+ e->content.linkable.title = ref->title ? strdup(ref->title) : NULL;
+ e->next = NULL;
+ return e;
+}
+
// Create an inline with a linkable string value.
-inline static inl* make_linkable(int t, inl* label, chunk url, chunk title)
+inline static inl* make_link(inl* label, chunk url, chunk title, int is_email)
{
inl* e = (inl*) malloc(sizeof(inl));
- e->tag = t;
+ e->tag = INL_LINK;
e->content.linkable.label = label;
- e->content.linkable.url = chunk_to_cstr(&url);
- e->content.linkable.title = title.len ? chunk_to_cstr(&title) : NULL;
+ e->content.linkable.url = clean_url(&url, is_email);
+ e->content.linkable.title = clean_title(&title);
e->next = NULL;
return e;
}
@@ -163,7 +174,6 @@ inline static inl* make_simple(int t)
#define make_entity(s) make_literal(INL_ENTITY, s)
#define make_linebreak() make_simple(INL_LINEBREAK)
#define make_softbreak() make_simple(INL_SOFTBREAK)
-#define make_link(label, url, title) make_linkable(INL_LINK, label, url, title)
#define make_emph(contents) make_inlines(INL_EMPH, contents)
#define make_strong(contents) make_inlines(INL_STRONG, contents)
@@ -309,37 +319,27 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
// space and newline characters into a single space.
static void normalize_whitespace(gh_buf *s)
{
- /* TODO */
-#if 0
bool last_char_was_space = false;
- int pos = 0;
- char c;
- while ((c = gh_buf_at(s, pos))) {
- switch (c) {
- case ' ':
- if (last_char_was_space) {
- bdelete(s, pos, 1);
- } else {
- pos++;
- }
- last_char_was_space = true;
- break;
- case '\n':
- if (last_char_was_space) {
- bdelete(s, pos, 1);
- } else {
- bdelete(s, pos, 1);
- binsertch(s, pos, 1, ' ');
- pos++;
- }
- last_char_was_space = true;
+ int r, w;
+
+ for (r = 0, w = 0; r < s->size; ++r) {
+ switch (s->ptr[r]) {
+ case ' ':
+ case '\n':
+ if (last_char_was_space)
break;
- default:
- pos++;
- last_char_was_space = false;
+
+ s->ptr[w++] = ' ';
+ last_char_was_space = true;
+ break;
+
+ default:
+ s->ptr[w++] = s->ptr[r];
+ last_char_was_space = false;
}
}
-#endif
+
+ gh_buf_truncate(s, w);
}
// Parse backtick code section or raw backticks, return an inline.
@@ -593,16 +593,19 @@ extern void unescape_buffer(gh_buf *buf)
// Clean a URL: remove surrounding whitespace and surrounding <>,
// and remove \ that escape punctuation.
-static unsigned char *clean_url(chunk *url)
+static unsigned char *clean_url(chunk *url, int is_email)
{
gh_buf buf = GH_BUF_INIT;
chunk_trim(url);
+ if (is_email)
+ gh_buf_puts(&buf, "mailto:");
+
if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
- gh_buf_set(&buf, url->data + 1, url->len - 2);
+ gh_buf_put(&buf, url->data + 1, url->len - 2);
} else {
- gh_buf_set(&buf, url->data, url->len);
+ gh_buf_put(&buf, url->data, url->len);
}
unescape_buffer(&buf);
@@ -613,8 +616,13 @@ static unsigned char *clean_url(chunk *url)
static unsigned char *clean_title(chunk *title)
{
gh_buf buf = GH_BUF_INIT;
- unsigned char first = title->data[0];
- unsigned char last = title->data[title->len - 1];
+ unsigned char first, last;
+
+ if (title->len == 0)
+ return NULL;
+
+ first = title->data[0];
+ last = title->data[title->len - 1];
// remove surrounding quotes if any:
if ((first == '\'' && last == '\'') ||
@@ -647,25 +655,22 @@ static inl* handle_pointy_brace(subject* subj)
return make_link(
make_str_with_entities(&contents),
contents,
- chunk_literal("")
+ chunk_literal(""),
+ 0
);
}
// next try to match an email autolink
matchlen = scan_autolink_email(&subj->input, subj->pos);
if (matchlen > 0) {
- gh_buf mail_url = GH_BUF_INIT;
-
contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
subj->pos += matchlen;
- gh_buf_puts(&mail_url, "mailto:");
- gh_buf_put(&mail_url, contents.data, contents.len);
-
return make_link(
make_str_with_entities(&contents),
- chunk_buf_detach(&mail_url),
- chunk_literal("")
+ contents,
+ chunk_literal(""),
+ 1
);
}
@@ -790,7 +795,7 @@ static inl* handle_left_bracket(subject* subj)
title = chunk_dup(&subj->input, starttitle, endtitle - starttitle);
lab = parse_chunk_inlines(&rawlabel, NULL);
- return make_link(lab, url, title);
+ return make_link(lab, url, title, 0);
} else {
// if we get here, we matched a label but didn't get further:
subj->pos = endlabel;
@@ -821,7 +826,7 @@ static inl* handle_left_bracket(subject* subj)
ref = lookup_reference(subj->reference_map, &reflabel);
if (ref != NULL) { // found
lab = parse_chunk_inlines(&rawlabel, NULL);
- result = make_link(lab, chunk_literal(ref->url), chunk_literal(ref->title));
+ result = make_link_from_reference(lab, ref);
} else {
subj->pos = endlabel;
lab = parse_chunk_inlines(&rawlabel, subj->reference_map);
--
cgit v1.2.3
From 9830d3a05a374a0d05676301bd4065917b59ad53 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Thu, 4 Sep 2014 17:42:12 +0200
Subject: 430/11
---
src/html/houdini_html_e.c | 4 ++--
src/html/html.c | 5 ++---
src/inlines.c | 14 +++-----------
3 files changed, 7 insertions(+), 16 deletions(-)
(limited to 'src/inlines.c')
diff --git a/src/html/houdini_html_e.c b/src/html/houdini_html_e.c
index 5cdd3dd..95b6c41 100644
--- a/src/html/houdini_html_e.c
+++ b/src/html/houdini_html_e.c
@@ -62,8 +62,8 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
break;
/* The forward slash is only escaped in secure mode */
- if (src[i] == '/' && !secure) {
- gh_buf_putc(ob, '/');
+ if ((src[i] == '/' || src[i] == '\'') && !secure) {
+ gh_buf_putc(ob, src[i]);
} else {
gh_buf_puts(ob, HTML_ESCAPES[esc]);
}
diff --git a/src/html/html.c b/src/html/html.c
index cdccf2a..913a602 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -191,10 +191,9 @@ void inlines_to_html(gh_buf *html, inl* ils)
escape_href(html, ils->content.linkable.url, -1);
inlines_to_html(&scrap, ils->content.inlines);
- if (scrap.size) {
- gh_buf_puts(html, "\" alt=\"");
+ gh_buf_puts(html, "\" alt=\"");
+ if (scrap.size)
escape_html(html, scrap.ptr, scrap.size);
- }
gh_buf_clear(&scrap);
if (ils->content.linkable.title) {
diff --git a/src/inlines.c b/src/inlines.c
index a0dcac9..599be84 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -21,7 +21,6 @@ reference* make_reference(chunk *label, chunk *url, chunk *title);
static unsigned char *clean_url(chunk *url, int is_email);
static unsigned char *clean_title(chunk *title);
-inline static unsigned char *chunk_to_cstr(chunk *c);
inline static void chunk_free(chunk *c);
inline static void chunk_trim(chunk *c);
@@ -37,6 +36,8 @@ static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap);
static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap);
static int subject_find_special_char(subject *subj);
+static void normalize_whitespace(gh_buf *s);
+
extern void free_reference(reference *ref) {
free(ref->label);
free(ref->url);
@@ -62,19 +63,10 @@ extern void free_reference_map(reference **refmap) {
static unsigned char *normalize_reference(chunk *ref)
{
gh_buf normalized = GH_BUF_INIT;
- int r, w;
utf8proc_case_fold(&normalized, ref->data, ref->len);
gh_buf_trim(&normalized);
-
- for (r = 0, w = 0; r < normalized.size; ++r) {
- if (r && gh_buf_at(&normalized, r - 1) == ' ') {
- while (gh_buf_at(&normalized, r) == ' ')
- r++;
- }
-
- normalized.ptr[w++] = normalized.ptr[r];
- }
+ normalize_whitespace(&normalized);
return gh_buf_detach(&normalized);
}
--
cgit v1.2.3
From d8f44f1e4f0bd944ab43e6434a1579d670ed66cf Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Thu, 4 Sep 2014 17:49:13 +0200
Subject: 433/8
---
spec.txt | 2 +-
src/html/html.c | 6 ++++--
src/inlines.c | 3 +++
src/print.c | 5 ++++-
4 files changed, 12 insertions(+), 4 deletions(-)
(limited to 'src/inlines.c')
diff --git a/spec.txt b/spec.txt
index d7e70f5..cfda2a3 100644
--- a/spec.txt
+++ b/spec.txt
@@ -3946,7 +3946,7 @@ But this is a link:
.
`
.
-http://foo.bar.`baz`
+http://foo.bar.`baz`
.
And this is an HTML tag:
diff --git a/src/html/html.c b/src/html/html.c
index 913a602..41b8fda 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -174,7 +174,8 @@ void inlines_to_html(gh_buf *html, inl* ils)
case INL_LINK:
gh_buf_puts(html, "content.linkable.url, -1);
+ if (ils->content.linkable.url)
+ escape_href(html, ils->content.linkable.url, -1);
if (ils->content.linkable.title) {
gh_buf_puts(html, "\" title=\"");
@@ -188,7 +189,8 @@ void inlines_to_html(gh_buf *html, inl* ils)
case INL_IMAGE:
gh_buf_puts(html, "
content.linkable.url, -1);
+ if (ils->content.linkable.url)
+ escape_href(html, ils->content.linkable.url, -1);
inlines_to_html(&scrap, ils->content.inlines);
gh_buf_puts(html, "\" alt=\"");
diff --git a/src/inlines.c b/src/inlines.c
index 599be84..8e2e683 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -591,6 +591,9 @@ static unsigned char *clean_url(chunk *url, int is_email)
chunk_trim(url);
+ if (url->len == 0)
+ return NULL;
+
if (is_email)
gh_buf_puts(&buf, "mailto:");
diff --git a/src/print.c b/src/print.c
index c262995..832ad4f 100644
--- a/src/print.c
+++ b/src/print.c
@@ -153,7 +153,10 @@ extern void print_inlines(inl* ils, int indent)
case INL_LINK:
case INL_IMAGE:
printf("%s url=", ils->tag == INL_LINK ? "link" : "image");
- print_str(ils->content.linkable.url, -1);
+
+ if (ils->content.linkable.url)
+ print_str(ils->content.linkable.url, -1);
+
if (ils->content.linkable.title) {
printf(" title=");
print_str(ils->content.linkable.title, -1);
--
cgit v1.2.3
From 543c2c94d71adee42c7bd2f8027d75c87ed8120d Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Thu, 4 Sep 2014 18:38:14 +0200
Subject: Rename to strbuf
---
src/blocks.c | 64 +++++++++++++++----------------
src/buffer.c | 86 ++++++++++++++++++++---------------------
src/buffer.h | 80 +++++++++++++++++++-------------------
src/chunk.h | 4 +-
src/html/houdini.h | 22 +++++------
src/html/houdini_href_e.c | 12 +++---
src/html/houdini_html_e.c | 10 ++---
src/html/html.c | 98 +++++++++++++++++++++++------------------------
src/inlines.c | 50 ++++++++++++------------
src/main.c | 4 +-
src/stmd.h | 16 ++++----
src/utf8.c | 6 +--
12 files changed, 226 insertions(+), 226 deletions(-)
(limited to 'src/inlines.c')
diff --git a/src/blocks.c b/src/blocks.c
index cf0e9e4..9faccd9 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -10,7 +10,7 @@
#define peek_at(i, n) (i)->data[n]
-static void incorporate_line(gh_buf *ln, int line_number, block** curptr);
+static void incorporate_line(strbuf *ln, int line_number, block** curptr);
static void finalize(block* b, int line_number);
static block* make_block(int tag, int start_line, int start_column)
@@ -28,7 +28,7 @@ static block* make_block(int tag, int start_line, int start_column)
e->parent = NULL;
e->top = NULL;
e->attributes.refmap = NULL;
- gh_buf_init(&e->string_content, 32);
+ strbuf_init(&e->string_content, 32);
e->inline_content = NULL;
e->next = NULL;
e->prev = NULL;
@@ -49,7 +49,7 @@ extern block* make_document()
}
// Returns true if line has only space characters, else false.
-bool is_blank(gh_buf *s, int offset)
+bool is_blank(strbuf *s, int offset)
{
while (offset < s->size) {
switch (s->ptr[offset]) {
@@ -85,10 +85,10 @@ static inline bool accepts_lines(int block_type)
static void add_line(block* block, chunk *ch, int offset)
{
assert(block->open);
- gh_buf_put(&block->string_content, ch->data + offset, ch->len - offset);
+ strbuf_put(&block->string_content, ch->data + offset, ch->len - offset);
}
-static void remove_trailing_blank_lines(gh_buf *ln)
+static void remove_trailing_blank_lines(strbuf *ln)
{
int i;
@@ -100,13 +100,13 @@ static void remove_trailing_blank_lines(gh_buf *ln)
}
if (i < 0) {
- gh_buf_clear(ln);
+ strbuf_clear(ln);
return;
}
- i = gh_buf_strchr(ln, '\n', i);
+ i = strbuf_strchr(ln, '\n', i);
if (i >= 0)
- gh_buf_truncate(ln, i);
+ strbuf_truncate(ln, i);
}
// Check to see if a block ends with a blank line, descending
@@ -164,10 +164,10 @@ static void finalize(block* b, int line_number)
switch (b->tag) {
case paragraph:
pos = 0;
- while (gh_buf_at(&b->string_content, 0) == '[' &&
+ while (strbuf_at(&b->string_content, 0) == '[' &&
(pos = parse_reference(&b->string_content, b->top->attributes.refmap))) {
- gh_buf_drop(&b->string_content, pos);
+ strbuf_drop(&b->string_content, pos);
}
if (is_blank(&b->string_content, 0)) {
b->tag = reference_def;
@@ -176,23 +176,23 @@ static void finalize(block* b, int line_number)
case indented_code:
remove_trailing_blank_lines(&b->string_content);
- gh_buf_putc(&b->string_content, '\n');
+ strbuf_putc(&b->string_content, '\n');
break;
case fenced_code:
// first line of contents becomes info
- firstlinelen = gh_buf_strchr(&b->string_content, '\n', 0);
+ firstlinelen = strbuf_strchr(&b->string_content, '\n', 0);
- gh_buf_init(&b->attributes.fenced_code_data.info, 0);
- gh_buf_set(
+ strbuf_init(&b->attributes.fenced_code_data.info, 0);
+ strbuf_set(
&b->attributes.fenced_code_data.info,
b->string_content.ptr,
firstlinelen
);
- gh_buf_drop(&b->string_content, firstlinelen + 1);
+ strbuf_drop(&b->string_content, firstlinelen + 1);
- gh_buf_trim(&b->attributes.fenced_code_data.info);
+ strbuf_trim(&b->attributes.fenced_code_data.info);
unescape_buffer(&b->attributes.fenced_code_data.info);
break;
@@ -265,9 +265,9 @@ extern void free_blocks(block* e)
while (e != NULL) {
next = e->next;
free_inlines(e->inline_content);
- gh_buf_free(&e->string_content);
+ strbuf_free(&e->string_content);
if (e->tag == fenced_code) {
- gh_buf_free(&e->attributes.fenced_code_data.info);
+ strbuf_free(&e->attributes.fenced_code_data.info);
} else if (e->tag == document) {
free_reference_map(e->attributes.refmap);
}
@@ -287,7 +287,7 @@ void process_inlines(block* cur, reference** refmap)
case setext_header:
cur->inline_content = parse_inlines(&cur->string_content, refmap);
// MEM
- // gh_buf_free(&cur->string_content);
+ // strbuf_free(&cur->string_content);
break;
default:
@@ -369,7 +369,7 @@ static int lists_match(struct ListData list_data,
list_data.bullet_char == item_data.bullet_char);
}
-static void expand_tabs(gh_buf *ob, const unsigned char *line, size_t size)
+static void expand_tabs(strbuf *ob, const unsigned char *line, size_t size)
{
size_t i = 0, tab = 0;
@@ -381,13 +381,13 @@ static void expand_tabs(gh_buf *ob, const unsigned char *line, size_t size)
}
if (i > org)
- gh_buf_put(ob, line + org, i - org);
+ strbuf_put(ob, line + org, i - org);
if (i >= size)
break;
do {
- gh_buf_putc(ob, ' '); tab++;
+ strbuf_putc(ob, ' '); tab++;
} while (tab % 4);
i++;
@@ -409,7 +409,7 @@ static block *finalize_document(block *document, int linenum)
extern block *stmd_parse_file(FILE *f)
{
- gh_buf line = GH_BUF_INIT;
+ strbuf line = GH_BUF_INIT;
unsigned char buffer[4096];
int linenum = 1;
block *document = make_document();
@@ -417,17 +417,17 @@ extern block *stmd_parse_file(FILE *f)
while (fgets((char *)buffer, sizeof(buffer), f)) {
expand_tabs(&line, buffer, strlen((char *)buffer));
incorporate_line(&line, linenum, &document);
- gh_buf_clear(&line);
+ strbuf_clear(&line);
linenum++;
}
- gh_buf_free(&line);
+ strbuf_free(&line);
return finalize_document(document, linenum);
}
extern block *stmd_parse_document(const unsigned char *buffer, size_t len)
{
- gh_buf line = GH_BUF_INIT;
+ strbuf line = GH_BUF_INIT;
int linenum = 1;
const unsigned char *end = buffer + len;
block *document = make_document();
@@ -444,11 +444,11 @@ extern block *stmd_parse_document(const unsigned char *buffer, size_t len)
}
incorporate_line(&line, linenum, &document);
- gh_buf_clear(&line);
+ strbuf_clear(&line);
linenum++;
}
- gh_buf_free(&line);
+ strbuf_free(&line);
return finalize_document(document, linenum);
}
@@ -471,7 +471,7 @@ static void chop_trailing_hashtags(chunk *ch)
}
// Process one line at a time, modifying a block.
-static void incorporate_line(gh_buf *line, int line_number, block** curptr)
+static void incorporate_line(strbuf *line, int line_number, block** curptr)
{
block* last_matched_container;
int offset = 0;
@@ -639,8 +639,8 @@ static void incorporate_line(gh_buf *line, int line_number, block** curptr)
} else if (container->tag == paragraph &&
(lev = scan_setext_header_line(&input, first_nonspace)) &&
// check that there is only one line in the paragraph:
- gh_buf_strrchr(&container->string_content, '\n',
- gh_buf_len(&container->string_content) - 2) < 0) {
+ strbuf_strrchr(&container->string_content, '\n',
+ strbuf_len(&container->string_content) - 2) < 0) {
container->tag = setext_header;
container->attributes.header_level = lev;
@@ -734,7 +734,7 @@ static void incorporate_line(gh_buf *line, int line_number, block** curptr)
container == last_matched_container &&
!blank &&
cur->tag == paragraph &&
- gh_buf_len(&cur->string_content) > 0) {
+ strbuf_len(&cur->string_content) > 0) {
add_line(cur, &input, offset);
diff --git a/src/buffer.c b/src/buffer.c
index dc4a405..90c2186 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -9,32 +9,32 @@
#include "buffer.h"
-/* Used as default value for gh_buf->ptr so that people can always
- * assume ptr is non-NULL and zero terminated even for new gh_bufs.
+/* Used as default value for strbuf->ptr so that people can always
+ * assume ptr is non-NULL and zero terminated even for new strbufs.
*/
-unsigned char gh_buf__initbuf[1];
-unsigned char gh_buf__oom[1];
+unsigned char strbuf__initbuf[1];
+unsigned char strbuf__oom[1];
#define ENSURE_SIZE(b, d) \
- if ((d) > buf->asize && gh_buf_grow(b, (d)) < 0)\
+ if ((d) > buf->asize && strbuf_grow(b, (d)) < 0)\
return -1;
-void gh_buf_init(gh_buf *buf, int initial_size)
+void strbuf_init(strbuf *buf, int initial_size)
{
buf->asize = 0;
buf->size = 0;
- buf->ptr = gh_buf__initbuf;
+ buf->ptr = strbuf__initbuf;
if (initial_size)
- gh_buf_grow(buf, initial_size);
+ strbuf_grow(buf, initial_size);
}
-int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom)
+int strbuf_try_grow(strbuf *buf, int target_size, bool mark_oom)
{
unsigned char *new_ptr;
int new_size;
- if (buf->ptr == gh_buf__oom)
+ if (buf->ptr == strbuf__oom)
return -1;
if (target_size <= buf->asize)
@@ -60,7 +60,7 @@ int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom)
if (!new_ptr) {
if (mark_oom)
- buf->ptr = gh_buf__oom;
+ buf->ptr = strbuf__oom;
return -1;
}
@@ -75,17 +75,17 @@ int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom)
return 0;
}
-void gh_buf_free(gh_buf *buf)
+void strbuf_free(strbuf *buf)
{
if (!buf) return;
- if (buf->ptr != gh_buf__initbuf && buf->ptr != gh_buf__oom)
+ if (buf->ptr != strbuf__initbuf && buf->ptr != strbuf__oom)
free(buf->ptr);
- gh_buf_init(buf, 0);
+ strbuf_init(buf, 0);
}
-void gh_buf_clear(gh_buf *buf)
+void strbuf_clear(strbuf *buf)
{
buf->size = 0;
@@ -93,10 +93,10 @@ void gh_buf_clear(gh_buf *buf)
buf->ptr[0] = '\0';
}
-int gh_buf_set(gh_buf *buf, const unsigned char *data, int len)
+int strbuf_set(strbuf *buf, const unsigned char *data, int len)
{
if (len <= 0 || data == NULL) {
- gh_buf_clear(buf);
+ strbuf_clear(buf);
} else {
if (data != buf->ptr) {
ENSURE_SIZE(buf, len + 1);
@@ -108,14 +108,14 @@ int gh_buf_set(gh_buf *buf, const unsigned char *data, int len)
return 0;
}
-int gh_buf_sets(gh_buf *buf, const char *string)
+int strbuf_sets(strbuf *buf, const char *string)
{
- return gh_buf_set(buf,
+ return strbuf_set(buf,
(const unsigned char *)string,
string ? strlen(string) : 0);
}
-int gh_buf_putc(gh_buf *buf, int c)
+int strbuf_putc(strbuf *buf, int c)
{
ENSURE_SIZE(buf, buf->size + 2);
buf->ptr[buf->size++] = c;
@@ -123,7 +123,7 @@ int gh_buf_putc(gh_buf *buf, int c)
return 0;
}
-int gh_buf_put(gh_buf *buf, const unsigned char *data, int len)
+int strbuf_put(strbuf *buf, const unsigned char *data, int len)
{
if (len <= 0)
return 0;
@@ -135,12 +135,12 @@ int gh_buf_put(gh_buf *buf, const unsigned char *data, int len)
return 0;
}
-int gh_buf_puts(gh_buf *buf, const char *string)
+int strbuf_puts(strbuf *buf, const char *string)
{
- return gh_buf_put(buf, (const unsigned char *)string, strlen(string));
+ return strbuf_put(buf, (const unsigned char *)string, strlen(string));
}
-int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap)
+int strbuf_vprintf(strbuf *buf, const char *format, va_list ap)
{
const int expected_size = buf->size + (strlen(format) * 2);
int len;
@@ -159,7 +159,7 @@ int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap)
if (len < 0) {
free(buf->ptr);
- buf->ptr = gh_buf__oom;
+ buf->ptr = strbuf__oom;
return -1;
}
@@ -174,19 +174,19 @@ int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap)
return 0;
}
-int gh_buf_printf(gh_buf *buf, const char *format, ...)
+int strbuf_printf(strbuf *buf, const char *format, ...)
{
int r;
va_list ap;
va_start(ap, format);
- r = gh_buf_vprintf(buf, format, ap);
+ r = strbuf_vprintf(buf, format, ap);
va_end(ap);
return r;
}
-void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf)
+void strbuf_copy_cstr(char *data, int datasize, const strbuf *buf)
{
int copylen;
@@ -204,28 +204,28 @@ void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf)
data[copylen] = '\0';
}
-void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b)
+void strbuf_swap(strbuf *buf_a, strbuf *buf_b)
{
- gh_buf t = *buf_a;
+ strbuf t = *buf_a;
*buf_a = *buf_b;
*buf_b = t;
}
-unsigned char *gh_buf_detach(gh_buf *buf)
+unsigned char *strbuf_detach(strbuf *buf)
{
unsigned char *data = buf->ptr;
- if (buf->asize == 0 || buf->ptr == gh_buf__oom)
+ if (buf->asize == 0 || buf->ptr == strbuf__oom)
return NULL;
- gh_buf_init(buf, 0);
+ strbuf_init(buf, 0);
return data;
}
-void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize)
+void strbuf_attach(strbuf *buf, unsigned char *ptr, int asize)
{
- gh_buf_free(buf);
+ strbuf_free(buf);
if (ptr) {
buf->ptr = ptr;
@@ -235,18 +235,18 @@ void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize)
else /* pass 0 to fall back on strlen + 1 */
buf->asize = buf->size + 1;
} else {
- gh_buf_grow(buf, asize);
+ strbuf_grow(buf, asize);
}
}
-int gh_buf_cmp(const gh_buf *a, const gh_buf *b)
+int strbuf_cmp(const strbuf *a, const strbuf *b)
{
int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size));
return (result != 0) ? result :
(a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
}
-int gh_buf_strchr(const gh_buf *buf, int c, int pos)
+int strbuf_strchr(const strbuf *buf, int c, int pos)
{
const unsigned char *p = memchr(buf->ptr + pos, c, buf->size - pos);
if (!p)
@@ -255,7 +255,7 @@ int gh_buf_strchr(const gh_buf *buf, int c, int pos)
return (int)(p - (const unsigned char *)buf->ptr);
}
-int gh_buf_strrchr(const gh_buf *buf, int c, int pos)
+int strbuf_strrchr(const strbuf *buf, int c, int pos)
{
int i;
@@ -267,7 +267,7 @@ int gh_buf_strrchr(const gh_buf *buf, int c, int pos)
return -1;
}
-void gh_buf_truncate(gh_buf *buf, int len)
+void strbuf_truncate(strbuf *buf, int len)
{
if (len < buf->size) {
buf->size = len;
@@ -275,7 +275,7 @@ void gh_buf_truncate(gh_buf *buf, int len)
}
}
-void gh_buf_drop(gh_buf *buf, int n)
+void strbuf_drop(strbuf *buf, int n)
{
if (n > 0) {
buf->size = buf->size - n;
@@ -286,7 +286,7 @@ void gh_buf_drop(gh_buf *buf, int n)
}
}
-void gh_buf_trim(gh_buf *buf)
+void strbuf_trim(strbuf *buf)
{
int i = 0;
@@ -296,7 +296,7 @@ void gh_buf_trim(gh_buf *buf)
while (i < buf->size && isspace(buf->ptr[i]))
i++;
- gh_buf_drop(buf, i);
+ strbuf_drop(buf, i);
/* rtrim */
while (buf->size > 0) {
diff --git a/src/buffer.h b/src/buffer.h
index 0d5143e..6f45cbb 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -9,20 +9,20 @@
typedef struct {
unsigned char *ptr;
int asize, size;
-} gh_buf;
+} strbuf;
-extern unsigned char gh_buf__initbuf[];
-extern unsigned char gh_buf__oom[];
+extern unsigned char strbuf__initbuf[];
+extern unsigned char strbuf__oom[];
-#define GH_BUF_INIT { gh_buf__initbuf, 0, 0 }
+#define GH_BUF_INIT { strbuf__initbuf, 0, 0 }
/**
- * Initialize a gh_buf structure.
+ * Initialize a strbuf structure.
*
* For the cases where GH_BUF_INIT cannot be used to do static
* initialization.
*/
-extern void gh_buf_init(gh_buf *buf, int initial_size);
+extern void strbuf_init(strbuf *buf, int initial_size);
/**
* Attempt to grow the buffer to hold at least `target_size` bytes.
@@ -32,7 +32,7 @@ extern void gh_buf_init(gh_buf *buf, int initial_size);
* existing buffer content will be preserved, but calling code must handle
* that buffer was not expanded.
*/
-extern int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom);
+extern int strbuf_try_grow(strbuf *buf, int target_size, bool mark_oom);
/**
* Grow the buffer to hold at least `target_size` bytes.
@@ -42,71 +42,71 @@ extern int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom);
*
* @return 0 on success or -1 on failure
*/
-static inline int gh_buf_grow(gh_buf *buf, int target_size)
+static inline int strbuf_grow(strbuf *buf, int target_size)
{
- return gh_buf_try_grow(buf, target_size, true);
+ return strbuf_try_grow(buf, target_size, true);
}
-extern void gh_buf_free(gh_buf *buf);
-extern void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b);
+extern void strbuf_free(strbuf *buf);
+extern void strbuf_swap(strbuf *buf_a, strbuf *buf_b);
/**
- * Test if there have been any reallocation failures with this gh_buf.
+ * Test if there have been any reallocation failures with this strbuf.
*
- * Any function that writes to a gh_buf can fail due to memory allocation
- * issues. If one fails, the gh_buf will be marked with an OOM error and
- * further calls to modify the buffer will fail. Check gh_buf_oom() at the
+ * Any function that writes to a strbuf can fail due to memory allocation
+ * issues. If one fails, the strbuf will be marked with an OOM error and
+ * further calls to modify the buffer will fail. Check strbuf_oom() at the
* end of your sequence and it will be true if you ran out of memory at any
* point with that buffer.
*
* @return false if no error, true if allocation error
*/
-static inline bool gh_buf_oom(const gh_buf *buf)
+static inline bool strbuf_oom(const strbuf *buf)
{
- return (buf->ptr == gh_buf__oom);
+ return (buf->ptr == strbuf__oom);
}
-static inline size_t gh_buf_len(const gh_buf *buf)
+static inline size_t strbuf_len(const strbuf *buf)
{
return buf->size;
}
-extern int gh_buf_cmp(const gh_buf *a, const gh_buf *b);
+extern int strbuf_cmp(const strbuf *a, const strbuf *b);
-extern void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize);
-extern unsigned char *gh_buf_detach(gh_buf *buf);
-extern void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf);
+extern void strbuf_attach(strbuf *buf, unsigned char *ptr, int asize);
+extern unsigned char *strbuf_detach(strbuf *buf);
+extern void strbuf_copy_cstr(char *data, int datasize, const strbuf *buf);
-static inline const char *gh_buf_cstr(const gh_buf *buf)
+static inline const char *strbuf_cstr(const strbuf *buf)
{
return (char *)buf->ptr;
}
-#define gh_buf_at(buf, n) ((buf)->ptr[n])
+#define strbuf_at(buf, n) ((buf)->ptr[n])
/*
* Functions below that return int value error codes will return 0 on
* success or -1 on failure (which generally means an allocation failed).
- * Using a gh_buf where the allocation has failed with result in -1 from
+ * Using a strbuf where the allocation has failed with result in -1 from
* all further calls using that buffer. As a result, you can ignore the
* return code of these functions and call them in a series then just call
- * gh_buf_oom at the end.
+ * strbuf_oom at the end.
*/
-extern int gh_buf_set(gh_buf *buf, const unsigned char *data, int len);
-extern int gh_buf_sets(gh_buf *buf, const char *string);
-extern int gh_buf_putc(gh_buf *buf, int c);
-extern int gh_buf_put(gh_buf *buf, const unsigned char *data, int len);
-extern int gh_buf_puts(gh_buf *buf, const char *string);
-extern int gh_buf_printf(gh_buf *buf, const char *format, ...)
+extern int strbuf_set(strbuf *buf, const unsigned char *data, int len);
+extern int strbuf_sets(strbuf *buf, const char *string);
+extern int strbuf_putc(strbuf *buf, int c);
+extern int strbuf_put(strbuf *buf, const unsigned char *data, int len);
+extern int strbuf_puts(strbuf *buf, const char *string);
+extern int strbuf_printf(strbuf *buf, const char *format, ...)
__attribute__((format (printf, 2, 3)));
-extern int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap);
-extern void gh_buf_clear(gh_buf *buf);
-
-int gh_buf_strchr(const gh_buf *buf, int c, int pos);
-int gh_buf_strrchr(const gh_buf *buf, int c, int pos);
-void gh_buf_drop(gh_buf *buf, int n);
-void gh_buf_truncate(gh_buf *buf, int len);
-void gh_buf_trim(gh_buf *buf);
+extern int strbuf_vprintf(strbuf *buf, const char *format, va_list ap);
+extern void strbuf_clear(strbuf *buf);
+
+int strbuf_strchr(const strbuf *buf, int c, int pos);
+int strbuf_strrchr(const strbuf *buf, int c, int pos);
+void strbuf_drop(strbuf *buf, int n);
+void strbuf_truncate(strbuf *buf, int len);
+void strbuf_trim(strbuf *buf);
#endif
diff --git a/src/chunk.h b/src/chunk.h
index f3841ed..f37a2f3 100644
--- a/src/chunk.h
+++ b/src/chunk.h
@@ -78,12 +78,12 @@ static inline chunk chunk_dup(const chunk *ch, int pos, int len)
return c;
}
-static inline chunk chunk_buf_detach(gh_buf *buf)
+static inline chunk chunk_buf_detach(strbuf *buf)
{
chunk c;
c.len = buf->size;
- c.data = gh_buf_detach(buf);
+ c.data = strbuf_detach(buf);
c.alloc = 1;
return c;
diff --git a/src/html/houdini.h b/src/html/houdini.h
index 31fe917..1e54d20 100644
--- a/src/html/houdini.h
+++ b/src/html/houdini.h
@@ -25,17 +25,17 @@ extern "C" {
#define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
#define HOUDINI_UNESCAPED_SIZE(x) (x)
-extern int houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure);
-extern int houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_unescape_js(gh_buf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure);
+extern int houdini_unescape_html(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_xml(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_uri(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_url(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_unescape_uri(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_unescape_url(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_js(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_unescape_js(strbuf *ob, const uint8_t *src, size_t size);
#ifdef __cplusplus
}
diff --git a/src/html/houdini_href_e.c b/src/html/houdini_href_e.c
index b2a7d79..12456ce 100644
--- a/src/html/houdini_href_e.c
+++ b/src/html/houdini_href_e.c
@@ -49,7 +49,7 @@ static const char HREF_SAFE[] = {
};
int
-houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
+houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size)
{
static const uint8_t hex_chars[] = "0123456789ABCDEF";
size_t i = 0, org;
@@ -63,7 +63,7 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
i++;
if (likely(i > org))
- gh_buf_put(ob, src + org, i - org);
+ strbuf_put(ob, src + org, i - org);
/* escaping */
if (i >= size)
@@ -73,14 +73,14 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
/* amp appears all the time in URLs, but needs
* HTML-entity escaping to be inside an href */
case '&':
- gh_buf_puts(ob, "&");
+ strbuf_puts(ob, "&");
break;
/* the single quote is a valid URL character
* according to the standard; it needs HTML
* entity escaping too */
case '\'':
- gh_buf_puts(ob, "'");
+ strbuf_puts(ob, "'");
break;
/* the space can be escaped to %20 or a plus
@@ -89,7 +89,7 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
* when building GET strings */
#if 0
case ' ':
- gh_buf_putc(ob, '+');
+ strbuf_putc(ob, '+');
break;
#endif
@@ -97,7 +97,7 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
default:
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
hex_str[2] = hex_chars[src[i] & 0xF];
- gh_buf_put(ob, hex_str, 3);
+ strbuf_put(ob, hex_str, 3);
}
i++;
diff --git a/src/html/houdini_html_e.c b/src/html/houdini_html_e.c
index 95b6c41..f2e86fe 100644
--- a/src/html/houdini_html_e.c
+++ b/src/html/houdini_html_e.c
@@ -45,7 +45,7 @@ static const char *HTML_ESCAPES[] = {
};
int
-houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
+houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure)
{
size_t i = 0, org, esc = 0;
@@ -55,7 +55,7 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
i++;
if (i > org)
- gh_buf_put(ob, src + org, i - org);
+ strbuf_put(ob, src + org, i - org);
/* escaping */
if (unlikely(i >= size))
@@ -63,9 +63,9 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
/* The forward slash is only escaped in secure mode */
if ((src[i] == '/' || src[i] == '\'') && !secure) {
- gh_buf_putc(ob, src[i]);
+ strbuf_putc(ob, src[i]);
} else {
- gh_buf_puts(ob, HTML_ESCAPES[esc]);
+ strbuf_puts(ob, HTML_ESCAPES[esc]);
}
i++;
@@ -75,7 +75,7 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
}
int
-houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size)
+houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size)
{
return houdini_escape_html0(ob, src, size, 1);
}
diff --git a/src/html/html.c b/src/html/html.c
index 41b8fda..a9356dd 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -10,7 +10,7 @@
// Functions to convert block and inline lists to HTML strings.
-static void escape_html(gh_buf *dest, const unsigned char *source, int length)
+static void escape_html(strbuf *dest, const unsigned char *source, int length)
{
if (length < 0)
length = strlen((char *)source);
@@ -18,7 +18,7 @@ static void escape_html(gh_buf *dest, const unsigned char *source, int length)
houdini_escape_html0(dest, source, (size_t)length, 0);
}
-static void escape_href(gh_buf *dest, const unsigned char *source, int length)
+static void escape_href(strbuf *dest, const unsigned char *source, int length)
{
if (length < 0)
length = strlen((char *)source);
@@ -26,14 +26,14 @@ static void escape_href(gh_buf *dest, const unsigned char *source, int length)
houdini_escape_href(dest, source, (size_t)length);
}
-static inline void cr(gh_buf *html)
+static inline void cr(strbuf *html)
{
if (html->size && html->ptr[html->size - 1] != '\n')
- gh_buf_putc(html, '\n');
+ strbuf_putc(html, '\n');
}
// Convert a block list to HTML. Returns 0 on success, and sets result.
-void blocks_to_html(gh_buf *html, block *b, bool tight)
+void blocks_to_html(strbuf *html, block *b, bool tight)
{
struct ListData *data;
@@ -48,25 +48,25 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
inlines_to_html(html, b->inline_content);
} else {
cr(html);
- gh_buf_puts(html, "");
+ strbuf_puts(html, "
");
inlines_to_html(html, b->inline_content);
- gh_buf_puts(html, "
\n");
+ strbuf_puts(html, "\n");
}
break;
case block_quote:
cr(html);
- gh_buf_puts(html, "\n");
+ strbuf_puts(html, "\n");
blocks_to_html(html, b->children, false);
- gh_buf_puts(html, "
\n");
+ strbuf_puts(html, "
\n");
break;
case list_item:
cr(html);
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
blocks_to_html(html, b->children, tight);
- gh_buf_trim(html); /* TODO: rtrim */
- gh_buf_puts(html, "\n");
+ strbuf_trim(html); /* TODO: rtrim */
+ strbuf_puts(html, "\n");
break;
case list:
@@ -75,58 +75,58 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
data = &(b->attributes.list_data);
if (data->start > 1) {
- gh_buf_printf(html, "<%s start=\"%d\">\n",
+ strbuf_printf(html, "<%s start=\"%d\">\n",
data->list_type == bullet ? "ul" : "ol",
data->start);
} else {
- gh_buf_puts(html, data->list_type == bullet ? "\n" : "\n");
+ strbuf_puts(html, data->list_type == bullet ? "\n" : "\n");
}
blocks_to_html(html, b->children, data->tight);
- gh_buf_puts(html, data->list_type == bullet ? "
" : "
");
- gh_buf_putc(html, '\n');
+ strbuf_puts(html, data->list_type == bullet ? "
" : "");
+ strbuf_putc(html, '\n');
break;
case atx_header:
case setext_header:
cr(html);
- gh_buf_printf(html, "", b->attributes.header_level);
+ strbuf_printf(html, "", b->attributes.header_level);
inlines_to_html(html, b->inline_content);
- gh_buf_printf(html, "\n", b->attributes.header_level);
+ strbuf_printf(html, "\n", b->attributes.header_level);
break;
case indented_code:
case fenced_code:
cr(html);
- gh_buf_puts(html, "tag == fenced_code) {
- gh_buf *info = &b->attributes.fenced_code_data.info;
+ strbuf *info = &b->attributes.fenced_code_data.info;
- if (gh_buf_len(info) > 0) {
- int first_tag = gh_buf_strchr(info, ' ', 0);
+ if (strbuf_len(info) > 0) {
+ int first_tag = strbuf_strchr(info, ' ', 0);
if (first_tag < 0)
- first_tag = gh_buf_len(info);
+ first_tag = strbuf_len(info);
- gh_buf_puts(html, " class=\"");
+ strbuf_puts(html, " class=\"");
escape_html(html, info->ptr, first_tag);
- gh_buf_putc(html, '"');
+ strbuf_putc(html, '"');
}
}
- gh_buf_puts(html, ">");
+ strbuf_puts(html, ">");
escape_html(html, b->string_content.ptr, b->string_content.size);
- gh_buf_puts(html, "
\n");
+ strbuf_puts(html, "\n");
break;
case html_block:
- gh_buf_put(html, b->string_content.ptr, b->string_content.size);
+ strbuf_put(html, b->string_content.ptr, b->string_content.size);
break;
case hrule:
- gh_buf_puts(html, "
\n");
+ strbuf_puts(html, "
\n");
break;
case reference_def:
@@ -141,9 +141,9 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
}
// Convert an inline list to HTML. Returns 0 on success, and sets result.
-void inlines_to_html(gh_buf *html, inl* ils)
+void inlines_to_html(strbuf *html, inl* ils)
{
- gh_buf scrap = GH_BUF_INIT;
+ strbuf scrap = GH_BUF_INIT;
while(ils != NULL) {
switch(ils->tag) {
@@ -152,70 +152,70 @@ void inlines_to_html(gh_buf *html, inl* ils)
break;
case INL_LINEBREAK:
- gh_buf_puts(html, "
\n");
+ strbuf_puts(html, "
\n");
break;
case INL_SOFTBREAK:
- gh_buf_putc(html, '\n');
+ strbuf_putc(html, '\n');
break;
case INL_CODE:
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
escape_html(html, ils->content.literal.data, ils->content.literal.len);
- gh_buf_puts(html, "
");
+ strbuf_puts(html, "
");
break;
case INL_RAW_HTML:
case INL_ENTITY:
- gh_buf_put(html,
+ strbuf_put(html,
ils->content.literal.data,
ils->content.literal.len);
break;
case INL_LINK:
- gh_buf_puts(html, "content.linkable.url)
escape_href(html, ils->content.linkable.url, -1);
if (ils->content.linkable.title) {
- gh_buf_puts(html, "\" title=\"");
+ strbuf_puts(html, "\" title=\"");
escape_html(html, ils->content.linkable.title, -1);
}
- gh_buf_puts(html, "\">");
+ strbuf_puts(html, "\">");
inlines_to_html(html, ils->content.inlines);
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
break;
case INL_IMAGE:
- gh_buf_puts(html, "
content.linkable.url)
escape_href(html, ils->content.linkable.url, -1);
inlines_to_html(&scrap, ils->content.inlines);
- gh_buf_puts(html, "\" alt=\"");
+ strbuf_puts(html, "\" alt=\"");
if (scrap.size)
escape_html(html, scrap.ptr, scrap.size);
- gh_buf_clear(&scrap);
+ strbuf_clear(&scrap);
if (ils->content.linkable.title) {
- gh_buf_puts(html, "\" title=\"");
+ strbuf_puts(html, "\" title=\"");
escape_html(html, ils->content.linkable.title, -1);
}
- gh_buf_puts(html, "\"/>");
+ strbuf_puts(html, "\"/>");
break;
case INL_STRONG:
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
inlines_to_html(html, ils->content.inlines);
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
break;
case INL_EMPH:
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
inlines_to_html(html, ils->content.inlines);
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
break;
}
ils = ils->next;
diff --git a/src/inlines.c b/src/inlines.c
index 8e2e683..33973df 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -25,7 +25,7 @@ inline static void chunk_free(chunk *c);
inline static void chunk_trim(chunk *c);
inline static chunk chunk_literal(const char *data);
-inline static chunk chunk_buf_detach(gh_buf *buf);
+inline static chunk chunk_buf_detach(strbuf *buf);
inline static chunk chunk_dup(const chunk *ch, int pos, int len);
static inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
@@ -33,10 +33,10 @@ static inl *parse_inlines_while(subject* subj, int (*f)(subject*));
static int parse_inline(subject* subj, inl ** last);
static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap);
-static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap);
+static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap);
static int subject_find_special_char(subject *subj);
-static void normalize_whitespace(gh_buf *s);
+static void normalize_whitespace(strbuf *s);
extern void free_reference(reference *ref) {
free(ref->label);
@@ -62,13 +62,13 @@ extern void free_reference_map(reference **refmap) {
// remove leading/trailing whitespace, case fold
static unsigned char *normalize_reference(chunk *ref)
{
- gh_buf normalized = GH_BUF_INIT;
+ strbuf normalized = GH_BUF_INIT;
utf8proc_case_fold(&normalized, ref->data, ref->len);
- gh_buf_trim(&normalized);
+ strbuf_trim(&normalized);
normalize_whitespace(&normalized);
- return gh_buf_detach(&normalized);
+ return strbuf_detach(&normalized);
}
// Returns reference if refmap contains a reference with matching
@@ -218,7 +218,7 @@ inline static inl* append_inlines(inl* a, inl* b)
return a;
}
-static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap)
+static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap)
{
e->input.data = buffer->ptr;
e->input.len = buffer->size;
@@ -309,7 +309,7 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
// Destructively modify string, collapsing consecutive
// space and newline characters into a single space.
-static void normalize_whitespace(gh_buf *s)
+static void normalize_whitespace(strbuf *s)
{
bool last_char_was_space = false;
int r, w;
@@ -331,7 +331,7 @@ static void normalize_whitespace(gh_buf *s)
}
}
- gh_buf_truncate(s, w);
+ strbuf_truncate(s, w);
}
// Parse backtick code section or raw backticks, return an inline.
@@ -346,10 +346,10 @@ static inl* handle_backticks(subject *subj)
subj->pos = startpos; // rewind
return make_str(openticks);
} else {
- gh_buf buf = GH_BUF_INIT;
+ strbuf buf = GH_BUF_INIT;
- gh_buf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
- gh_buf_trim(&buf);
+ strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
+ strbuf_trim(&buf);
normalize_whitespace(&buf);
return make_code(chunk_buf_detach(&buf));
@@ -569,7 +569,7 @@ static inl *make_str_with_entities(chunk *content)
}
// Destructively unescape a string: remove backslashes before punctuation chars.
-extern void unescape_buffer(gh_buf *buf)
+extern void unescape_buffer(strbuf *buf)
{
int r, w;
@@ -580,14 +580,14 @@ extern void unescape_buffer(gh_buf *buf)
buf->ptr[w++] = buf->ptr[r];
}
- gh_buf_truncate(buf, w);
+ strbuf_truncate(buf, w);
}
// Clean a URL: remove surrounding whitespace and surrounding <>,
// and remove \ that escape punctuation.
static unsigned char *clean_url(chunk *url, int is_email)
{
- gh_buf buf = GH_BUF_INIT;
+ strbuf buf = GH_BUF_INIT;
chunk_trim(url);
@@ -595,22 +595,22 @@ static unsigned char *clean_url(chunk *url, int is_email)
return NULL;
if (is_email)
- gh_buf_puts(&buf, "mailto:");
+ strbuf_puts(&buf, "mailto:");
if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
- gh_buf_put(&buf, url->data + 1, url->len - 2);
+ strbuf_put(&buf, url->data + 1, url->len - 2);
} else {
- gh_buf_put(&buf, url->data, url->len);
+ strbuf_put(&buf, url->data, url->len);
}
unescape_buffer(&buf);
- return gh_buf_detach(&buf);
+ return strbuf_detach(&buf);
}
// Clean a title: remove surrounding quotes and remove \ that escape punctuation.
static unsigned char *clean_title(chunk *title)
{
- gh_buf buf = GH_BUF_INIT;
+ strbuf buf = GH_BUF_INIT;
unsigned char first, last;
if (title->len == 0)
@@ -623,13 +623,13 @@ static unsigned char *clean_title(chunk *title)
if ((first == '\'' && last == '\'') ||
(first == '(' && last == ')') ||
(first == '"' && last == '"')) {
- gh_buf_set(&buf, title->data + 1, title->len - 2);
+ strbuf_set(&buf, title->data + 1, title->len - 2);
} else {
- gh_buf_set(&buf, title->data, title->len);
+ strbuf_set(&buf, title->data, title->len);
}
unescape_buffer(&buf);
- return gh_buf_detach(&buf);
+ return strbuf_detach(&buf);
}
// Parse an autolink or HTML tag.
@@ -971,7 +971,7 @@ static int parse_inline(subject* subj, inl ** last)
return 1;
}
-extern inl* parse_inlines(gh_buf *input, reference** refmap)
+extern inl* parse_inlines(strbuf *input, reference** refmap)
{
subject subj;
subject_from_buf(&subj, input, refmap);
@@ -993,7 +993,7 @@ void spnl(subject* subj)
// Modify refmap if a reference is encountered.
// Return 0 if no reference found, otherwise position of subject
// after reference is parsed.
-extern int parse_reference(gh_buf *input, reference** refmap)
+extern int parse_reference(strbuf *input, reference** refmap)
{
subject subj;
diff --git a/src/main.c b/src/main.c
index e1abedc..7cf67e2 100644
--- a/src/main.c
+++ b/src/main.c
@@ -14,14 +14,14 @@ void print_usage()
static void print_document(block *document, bool ast)
{
- gh_buf html = GH_BUF_INIT;
+ strbuf html = GH_BUF_INIT;
if (ast) {
print_blocks(document, 0);
} else {
blocks_to_html(&html, document, false);
printf("%s", html.ptr);
- gh_buf_free(&html);
+ strbuf_free(&html);
}
}
diff --git a/src/stmd.h b/src/stmd.h
index 4a3c399..2e86f3a 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -50,7 +50,7 @@ struct FencedCodeData {
int fence_length;
int fence_offset;
char fence_char;
- gh_buf info;
+ strbuf info;
};
typedef struct Block {
@@ -76,7 +76,7 @@ typedef struct Block {
struct Block* last_child;
struct Block* parent;
struct Block* top;
- gh_buf string_content;
+ strbuf string_content;
inl* inline_content;
union {
struct ListData list_data;
@@ -88,15 +88,15 @@ typedef struct Block {
struct Block * prev;
} block;
-inl* parse_inlines(gh_buf *input, reference** refmap);
+inl* parse_inlines(strbuf *input, reference** refmap);
void free_inlines(inl* e);
-int parse_reference(gh_buf *input, reference** refmap);
+int parse_reference(strbuf *input, reference** refmap);
void free_reference(reference *ref);
void free_reference_map(reference **refmap);
void add_reference(reference** refmap, reference* ref);
-void unescape_buffer(gh_buf *buf);
+void unescape_buffer(strbuf *buf);
extern block* make_document();
extern block* add_child(block* parent,
@@ -109,9 +109,9 @@ extern block *stmd_parse_file(FILE *f);
void print_inlines(inl* ils, int indent);
void print_blocks(block* blk, int indent);
-void blocks_to_html(gh_buf *html, block *b, bool tight);
-void inlines_to_html(gh_buf *html, inl *b);
+void blocks_to_html(strbuf *html, block *b, bool tight);
+void inlines_to_html(strbuf *html, inl *b);
-void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len);
+void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len);
#endif
diff --git a/src/utf8.c b/src/utf8.c
index 32c78a4..cebd872 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -84,7 +84,7 @@ ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst)
return length;
}
-void utf8proc_encode_char(int32_t uc, gh_buf *buf)
+void utf8proc_encode_char(int32_t uc, strbuf *buf)
{
unsigned char dst[4];
int len = 0;
@@ -119,10 +119,10 @@ void utf8proc_encode_char(int32_t uc, gh_buf *buf)
assert(false);
}
- gh_buf_put(buf, dst, len);
+ strbuf_put(buf, dst, len);
}
-void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len)
+void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len)
{
int32_t c;
--
cgit v1.2.3
From 647b15968c95ec268d6d728eea73756c7ba648a8 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Thu, 4 Sep 2014 18:42:49 +0200
Subject: Rename inl
---
src/html/html.c | 2 +-
src/inlines.c | 88 ++++++++++++++++++++++++++++-----------------------------
src/print.c | 2 +-
src/stmd.h | 34 ++++++++++++++--------
4 files changed, 68 insertions(+), 58 deletions(-)
(limited to 'src/inlines.c')
diff --git a/src/html/html.c b/src/html/html.c
index a9356dd..53521b8 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -141,7 +141,7 @@ void blocks_to_html(strbuf *html, block *b, bool tight)
}
// Convert an inline list to HTML. Returns 0 on success, and sets result.
-void inlines_to_html(strbuf *html, inl* ils)
+void inlines_to_html(strbuf *html, struct inl* ils)
{
strbuf scrap = GH_BUF_INIT;
diff --git a/src/inlines.c b/src/inlines.c
index 33973df..301125e 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -28,9 +28,9 @@ inline static chunk chunk_literal(const char *data);
inline static chunk chunk_buf_detach(strbuf *buf);
inline static chunk chunk_dup(const chunk *ch, int pos, int len);
-static inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
-static inl *parse_inlines_while(subject* subj, int (*f)(subject*));
-static int parse_inline(subject* subj, inl ** last);
+static struct inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
+static struct inl *parse_inlines_while(subject* subj, int (*f)(subject*));
+static int parse_inline(subject* subj, struct inl ** last);
static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap);
static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap);
@@ -108,9 +108,9 @@ extern void add_reference(reference** refmap, reference* ref)
}
}
-inline static inl* make_link_from_reference(inl* label, reference *ref)
+inline static struct inl* make_link_from_reference(struct inl* label, reference *ref)
{
- inl* e = (inl*) malloc(sizeof(inl));
+ struct inl* e = (struct inl*) malloc(sizeof(struct inl));
e->tag = INL_LINK;
e->content.linkable.label = label;
e->content.linkable.url = strdup(ref->url);
@@ -120,9 +120,9 @@ inline static inl* make_link_from_reference(inl* label, reference *ref)
}
// Create an inline with a linkable string value.
-inline static inl* make_link(inl* label, chunk url, chunk title, int is_email)
+inline static struct inl* make_link(struct inl* label, chunk url, chunk title, int is_email)
{
- inl* e = (inl*) malloc(sizeof(inl));
+ struct inl* e = (struct inl*) malloc(sizeof(struct inl));
e->tag = INL_LINK;
e->content.linkable.label = label;
e->content.linkable.url = clean_url(&url, is_email);
@@ -131,9 +131,9 @@ inline static inl* make_link(inl* label, chunk url, chunk title, int is_email)
return e;
}
-inline static inl* make_inlines(int t, inl* contents)
+inline static struct inl* make_inlines(int t, struct inl* contents)
{
- inl* e = (inl*) malloc(sizeof(inl));
+ struct inl* e = (struct inl*) malloc(sizeof(struct inl));
e->tag = t;
e->content.inlines = contents;
e->next = NULL;
@@ -141,9 +141,9 @@ inline static inl* make_inlines(int t, inl* contents)
}
// Create an inline with a literal string value.
-inline static inl* make_literal(int t, chunk s)
+inline static struct inl* make_literal(int t, chunk s)
{
- inl* e = (inl*) malloc(sizeof(inl));
+ struct inl* e = (struct inl*) malloc(sizeof(struct inl));
e->tag = t;
e->content.literal = s;
e->next = NULL;
@@ -151,9 +151,9 @@ inline static inl* make_literal(int t, chunk s)
}
// Create an inline with no value.
-inline static inl* make_simple(int t)
+inline static struct inl* make_simple(int t)
{
- inl* e = (inl*) malloc(sizeof(inl));
+ struct inl* e = (struct inl*) malloc(sizeof(struct inl));
e->tag = t;
e->next = NULL;
return e;
@@ -170,9 +170,9 @@ inline static inl* make_simple(int t)
#define make_strong(contents) make_inlines(INL_STRONG, contents)
// Free an inline list.
-extern void free_inlines(inl* e)
+extern void free_inlines(struct inl* e)
{
- inl * next;
+ struct inl * next;
while (e != NULL) {
switch (e->tag){
case INL_STRING:
@@ -205,12 +205,12 @@ extern void free_inlines(inl* e)
// Append inline list b to the end of inline list a.
// Return pointer to head of new list.
-inline static inl* append_inlines(inl* a, inl* b)
+inline static struct inl* append_inlines(struct inl* a, struct inl* b)
{
if (a == NULL) { // NULL acts like an empty list
return b;
}
- inl* cur = a;
+ struct inl* cur = a;
while (cur->next) {
cur = cur->next;
}
@@ -336,7 +336,7 @@ static void normalize_whitespace(strbuf *s)
// Parse backtick code section or raw backticks, return an inline.
// Assumes that the subject has a backtick at the current position.
-static inl* handle_backticks(subject *subj)
+static struct inl* handle_backticks(subject *subj)
{
chunk openticks = take_while(subj, isbacktick);
int startpos = subj->pos;
@@ -382,15 +382,15 @@ static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close)
// Parse strong/emph or a fallback.
// Assumes the subject has '_' or '*' at the current position.
-static inl* handle_strong_emph(subject* subj, char c)
+static struct inl* handle_strong_emph(subject* subj, char c)
{
bool can_open, can_close;
- inl * result = NULL;
- inl ** last = malloc(sizeof(inl *));
- inl * new;
- inl * il;
- inl * first_head = NULL;
- inl * first_close = NULL;
+ struct inl * result = NULL;
+ struct inl ** last = malloc(sizeof(struct inl *));
+ struct inl * new;
+ struct inl * il;
+ struct inl * first_head = NULL;
+ struct inl * first_close = NULL;
int first_close_delims = 0;
int numdelims;
@@ -508,7 +508,7 @@ done:
}
// Parse backslash-escape or just a backslash, returning an inline.
-static inl* handle_backslash(subject *subj)
+static struct inl* handle_backslash(subject *subj)
{
advance(subj);
unsigned char nextchar = peek_char(subj);
@@ -525,10 +525,10 @@ static inl* handle_backslash(subject *subj)
// Parse an entity or a regular "&" string.
// Assumes the subject has an '&' character at the current position.
-static inl* handle_entity(subject* subj)
+static struct inl* handle_entity(subject* subj)
{
int match;
- inl *result;
+ struct inl *result;
match = scan_entity(&subj->input, subj->pos);
if (match) {
result = make_entity(chunk_dup(&subj->input, subj->pos, match));
@@ -542,10 +542,10 @@ static inl* handle_entity(subject* subj)
// Like make_str, but parses entities.
// Returns an inline sequence consisting of str and entity elements.
-static inl *make_str_with_entities(chunk *content)
+static struct inl *make_str_with_entities(chunk *content)
{
- inl *result = NULL;
- inl *new;
+ struct inl *result = NULL;
+ struct inl *new;
int searchpos;
char c;
subject subj;
@@ -634,7 +634,7 @@ static unsigned char *clean_title(chunk *title)
// Parse an autolink or HTML tag.
// Assumes the subject has a '<' character at the current position.
-static inl* handle_pointy_brace(subject* subj)
+static struct inl* handle_pointy_brace(subject* subj)
{
int matchlen = 0;
chunk contents;
@@ -693,7 +693,7 @@ static inl* handle_pointy_brace(subject* subj)
static int link_label(subject* subj, chunk *raw_label)
{
int nestlevel = 0;
- inl* tmp = NULL;
+ struct inl* tmp = NULL;
int startpos = subj->pos;
if (subj->label_nestlevel) {
@@ -751,10 +751,10 @@ static int link_label(subject* subj, chunk *raw_label)
}
// Parse a link or the link portion of an image, or return a fallback.
-static inl* handle_left_bracket(subject* subj)
+static struct inl* handle_left_bracket(subject* subj)
{
- inl *lab = NULL;
- inl *result = NULL;
+ struct inl *lab = NULL;
+ struct inl *result = NULL;
reference *ref;
int n;
int sps;
@@ -838,7 +838,7 @@ static inl* handle_left_bracket(subject* subj)
// Parse a hard or soft linebreak, returning an inline.
// Assumes the subject has a newline at the current position.
-static inl* handle_newline(subject *subj)
+static struct inl* handle_newline(subject *subj)
{
int nlpos = subj->pos;
// skip over newline
@@ -862,16 +862,16 @@ inline static int not_eof(subject* subj)
}
// Parse inlines while a predicate is satisfied. Return inlines.
-extern inl* parse_inlines_while(subject* subj, int (*f)(subject*))
+extern struct inl* parse_inlines_while(subject* subj, int (*f)(subject*))
{
- inl* result = NULL;
- inl** last = &result;
+ struct inl* result = NULL;
+ struct inl** last = &result;
while ((*f)(subj) && parse_inline(subj, last)) {
}
return result;
}
-inl *parse_chunk_inlines(chunk *chunk, reference** refmap)
+struct inl *parse_chunk_inlines(chunk *chunk, reference** refmap)
{
subject subj;
subject_from_chunk(&subj, chunk, refmap);
@@ -894,9 +894,9 @@ static int subject_find_special_char(subject *subj)
// Parse an inline, advancing subject, and add it to last element.
// Adjust tail to point to new last element of list.
// Return 0 if no inline can be parsed, 1 otherwise.
-static int parse_inline(subject* subj, inl ** last)
+static int parse_inline(subject* subj, struct inl ** last)
{
- inl* new = NULL;
+ struct inl* new = NULL;
chunk contents;
unsigned char c;
int endpos;
@@ -971,7 +971,7 @@ static int parse_inline(subject* subj, inl ** last)
return 1;
}
-extern inl* parse_inlines(strbuf *input, reference** refmap)
+extern struct inl* parse_inlines(strbuf *input, reference** refmap)
{
subject subj;
subject_from_buf(&subj, input, refmap);
diff --git a/src/print.c b/src/print.c
index 832ad4f..63f63c8 100644
--- a/src/print.c
+++ b/src/print.c
@@ -117,7 +117,7 @@ extern void print_blocks(block* b, int indent)
}
// Prettyprint an inline list, for debugging.
-extern void print_inlines(inl* ils, int indent)
+extern void print_inlines(struct inl* ils, int indent)
{
while(ils != NULL) {
for (int i=0; i < indent; i++) {
diff --git a/src/stmd.h b/src/stmd.h
index 2e86f3a..9ed33ec 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -10,20 +10,30 @@
#define VERSION "0.1"
#define CODE_INDENT 4
-typedef struct Inline {
- enum { INL_STRING, INL_SOFTBREAK, INL_LINEBREAK, INL_CODE, INL_RAW_HTML, INL_ENTITY,
- INL_EMPH, INL_STRONG, INL_LINK, INL_IMAGE } tag;
+struct inl {
+ enum {
+ INL_STRING,
+ INL_SOFTBREAK,
+ INL_LINEBREAK,
+ INL_CODE,
+ INL_RAW_HTML,
+ INL_ENTITY,
+ INL_EMPH,
+ INL_STRONG,
+ INL_LINK,
+ INL_IMAGE
+ } tag;
union {
chunk literal;
- struct Inline *inlines;
+ struct inl *inlines;
struct {
- struct Inline *label;
+ struct inl *label;
unsigned char *url;
unsigned char *title;
} linkable;
} content;
- struct Inline *next;
-} inl;
+ struct inl *next;
+};
typedef struct Reference {
unsigned char *label;
@@ -77,7 +87,7 @@ typedef struct Block {
struct Block* parent;
struct Block* top;
strbuf string_content;
- inl* inline_content;
+ struct inl* inline_content;
union {
struct ListData list_data;
struct FencedCodeData fenced_code_data;
@@ -88,8 +98,8 @@ typedef struct Block {
struct Block * prev;
} block;
-inl* parse_inlines(strbuf *input, reference** refmap);
-void free_inlines(inl* e);
+struct inl* parse_inlines(strbuf *input, reference** refmap);
+void free_inlines(struct inl* e);
int parse_reference(strbuf *input, reference** refmap);
void free_reference(reference *ref);
@@ -106,11 +116,11 @@ void free_blocks(block* e);
extern block *stmd_parse_document(const unsigned char *buffer, size_t len);
extern block *stmd_parse_file(FILE *f);
-void print_inlines(inl* ils, int indent);
+void print_inlines(struct inl* ils, int indent);
void print_blocks(block* blk, int indent);
void blocks_to_html(strbuf *html, block *b, bool tight);
-void inlines_to_html(strbuf *html, inl *b);
+void inlines_to_html(strbuf *html, struct inl *b);
void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len);
--
cgit v1.2.3
From 9e4855365b920c2a80b0f1ab6937280f0b504334 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Thu, 4 Sep 2014 18:45:44 +0200
Subject: Rename `inl`
---
src/html/html.c | 2 +-
src/inlines.c | 88 ++++++++++++++++++++++++++++-----------------------------
src/print.c | 2 +-
src/stmd.h | 28 ++++++++++--------
4 files changed, 62 insertions(+), 58 deletions(-)
(limited to 'src/inlines.c')
diff --git a/src/html/html.c b/src/html/html.c
index 53521b8..a7bb21a 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -141,7 +141,7 @@ void blocks_to_html(strbuf *html, block *b, bool tight)
}
// Convert an inline list to HTML. Returns 0 on success, and sets result.
-void inlines_to_html(strbuf *html, struct inl* ils)
+void inlines_to_html(strbuf *html, node_inl* ils)
{
strbuf scrap = GH_BUF_INIT;
diff --git a/src/inlines.c b/src/inlines.c
index 301125e..6bb89da 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -28,9 +28,9 @@ inline static chunk chunk_literal(const char *data);
inline static chunk chunk_buf_detach(strbuf *buf);
inline static chunk chunk_dup(const chunk *ch, int pos, int len);
-static struct inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
-static struct inl *parse_inlines_while(subject* subj, int (*f)(subject*));
-static int parse_inline(subject* subj, struct inl ** last);
+static node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
+static node_inl *parse_inlines_while(subject* subj, int (*f)(subject*));
+static int parse_inline(subject* subj, node_inl ** last);
static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap);
static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap);
@@ -108,9 +108,9 @@ extern void add_reference(reference** refmap, reference* ref)
}
}
-inline static struct inl* make_link_from_reference(struct inl* label, reference *ref)
+inline static node_inl* make_link_from_reference(node_inl* label, reference *ref)
{
- struct inl* e = (struct inl*) malloc(sizeof(struct inl));
+ node_inl* e = (node_inl*) malloc(sizeof(node_inl));
e->tag = INL_LINK;
e->content.linkable.label = label;
e->content.linkable.url = strdup(ref->url);
@@ -120,9 +120,9 @@ inline static struct inl* make_link_from_reference(struct inl* label, reference
}
// Create an inline with a linkable string value.
-inline static struct inl* make_link(struct inl* label, chunk url, chunk title, int is_email)
+inline static node_inl* make_link(node_inl* label, chunk url, chunk title, int is_email)
{
- struct inl* e = (struct inl*) malloc(sizeof(struct inl));
+ node_inl* e = (node_inl*) malloc(sizeof(node_inl));
e->tag = INL_LINK;
e->content.linkable.label = label;
e->content.linkable.url = clean_url(&url, is_email);
@@ -131,9 +131,9 @@ inline static struct inl* make_link(struct inl* label, chunk url, chunk title, i
return e;
}
-inline static struct inl* make_inlines(int t, struct inl* contents)
+inline static node_inl* make_inlines(int t, node_inl* contents)
{
- struct inl* e = (struct inl*) malloc(sizeof(struct inl));
+ node_inl* e = (node_inl*) malloc(sizeof(node_inl));
e->tag = t;
e->content.inlines = contents;
e->next = NULL;
@@ -141,9 +141,9 @@ inline static struct inl* make_inlines(int t, struct inl* contents)
}
// Create an inline with a literal string value.
-inline static struct inl* make_literal(int t, chunk s)
+inline static node_inl* make_literal(int t, chunk s)
{
- struct inl* e = (struct inl*) malloc(sizeof(struct inl));
+ node_inl* e = (node_inl*) malloc(sizeof(node_inl));
e->tag = t;
e->content.literal = s;
e->next = NULL;
@@ -151,9 +151,9 @@ inline static struct inl* make_literal(int t, chunk s)
}
// Create an inline with no value.
-inline static struct inl* make_simple(int t)
+inline static node_inl* make_simple(int t)
{
- struct inl* e = (struct inl*) malloc(sizeof(struct inl));
+ node_inl* e = (node_inl*) malloc(sizeof(node_inl));
e->tag = t;
e->next = NULL;
return e;
@@ -170,9 +170,9 @@ inline static struct inl* make_simple(int t)
#define make_strong(contents) make_inlines(INL_STRONG, contents)
// Free an inline list.
-extern void free_inlines(struct inl* e)
+extern void free_inlines(node_inl* e)
{
- struct inl * next;
+ node_inl * next;
while (e != NULL) {
switch (e->tag){
case INL_STRING:
@@ -205,12 +205,12 @@ extern void free_inlines(struct inl* e)
// Append inline list b to the end of inline list a.
// Return pointer to head of new list.
-inline static struct inl* append_inlines(struct inl* a, struct inl* b)
+inline static node_inl* append_inlines(node_inl* a, node_inl* b)
{
if (a == NULL) { // NULL acts like an empty list
return b;
}
- struct inl* cur = a;
+ node_inl* cur = a;
while (cur->next) {
cur = cur->next;
}
@@ -336,7 +336,7 @@ static void normalize_whitespace(strbuf *s)
// Parse backtick code section or raw backticks, return an inline.
// Assumes that the subject has a backtick at the current position.
-static struct inl* handle_backticks(subject *subj)
+static node_inl* handle_backticks(subject *subj)
{
chunk openticks = take_while(subj, isbacktick);
int startpos = subj->pos;
@@ -382,15 +382,15 @@ static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close)
// Parse strong/emph or a fallback.
// Assumes the subject has '_' or '*' at the current position.
-static struct inl* handle_strong_emph(subject* subj, char c)
+static node_inl* handle_strong_emph(subject* subj, char c)
{
bool can_open, can_close;
- struct inl * result = NULL;
- struct inl ** last = malloc(sizeof(struct inl *));
- struct inl * new;
- struct inl * il;
- struct inl * first_head = NULL;
- struct inl * first_close = NULL;
+ node_inl * result = NULL;
+ node_inl ** last = malloc(sizeof(node_inl *));
+ node_inl * new;
+ node_inl * il;
+ node_inl * first_head = NULL;
+ node_inl * first_close = NULL;
int first_close_delims = 0;
int numdelims;
@@ -508,7 +508,7 @@ done:
}
// Parse backslash-escape or just a backslash, returning an inline.
-static struct inl* handle_backslash(subject *subj)
+static node_inl* handle_backslash(subject *subj)
{
advance(subj);
unsigned char nextchar = peek_char(subj);
@@ -525,10 +525,10 @@ static struct inl* handle_backslash(subject *subj)
// Parse an entity or a regular "&" string.
// Assumes the subject has an '&' character at the current position.
-static struct inl* handle_entity(subject* subj)
+static node_inl* handle_entity(subject* subj)
{
int match;
- struct inl *result;
+ node_inl *result;
match = scan_entity(&subj->input, subj->pos);
if (match) {
result = make_entity(chunk_dup(&subj->input, subj->pos, match));
@@ -542,10 +542,10 @@ static struct inl* handle_entity(subject* subj)
// Like make_str, but parses entities.
// Returns an inline sequence consisting of str and entity elements.
-static struct inl *make_str_with_entities(chunk *content)
+static node_inl *make_str_with_entities(chunk *content)
{
- struct inl *result = NULL;
- struct inl *new;
+ node_inl *result = NULL;
+ node_inl *new;
int searchpos;
char c;
subject subj;
@@ -634,7 +634,7 @@ static unsigned char *clean_title(chunk *title)
// Parse an autolink or HTML tag.
// Assumes the subject has a '<' character at the current position.
-static struct inl* handle_pointy_brace(subject* subj)
+static node_inl* handle_pointy_brace(subject* subj)
{
int matchlen = 0;
chunk contents;
@@ -693,7 +693,7 @@ static struct inl* handle_pointy_brace(subject* subj)
static int link_label(subject* subj, chunk *raw_label)
{
int nestlevel = 0;
- struct inl* tmp = NULL;
+ node_inl* tmp = NULL;
int startpos = subj->pos;
if (subj->label_nestlevel) {
@@ -751,10 +751,10 @@ static int link_label(subject* subj, chunk *raw_label)
}
// Parse a link or the link portion of an image, or return a fallback.
-static struct inl* handle_left_bracket(subject* subj)
+static node_inl* handle_left_bracket(subject* subj)
{
- struct inl *lab = NULL;
- struct inl *result = NULL;
+ node_inl *lab = NULL;
+ node_inl *result = NULL;
reference *ref;
int n;
int sps;
@@ -838,7 +838,7 @@ static struct inl* handle_left_bracket(subject* subj)
// Parse a hard or soft linebreak, returning an inline.
// Assumes the subject has a newline at the current position.
-static struct inl* handle_newline(subject *subj)
+static node_inl* handle_newline(subject *subj)
{
int nlpos = subj->pos;
// skip over newline
@@ -862,16 +862,16 @@ inline static int not_eof(subject* subj)
}
// Parse inlines while a predicate is satisfied. Return inlines.
-extern struct inl* parse_inlines_while(subject* subj, int (*f)(subject*))
+extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*))
{
- struct inl* result = NULL;
- struct inl** last = &result;
+ node_inl* result = NULL;
+ node_inl** last = &result;
while ((*f)(subj) && parse_inline(subj, last)) {
}
return result;
}
-struct inl *parse_chunk_inlines(chunk *chunk, reference** refmap)
+node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap)
{
subject subj;
subject_from_chunk(&subj, chunk, refmap);
@@ -894,9 +894,9 @@ static int subject_find_special_char(subject *subj)
// Parse an inline, advancing subject, and add it to last element.
// Adjust tail to point to new last element of list.
// Return 0 if no inline can be parsed, 1 otherwise.
-static int parse_inline(subject* subj, struct inl ** last)
+static int parse_inline(subject* subj, node_inl ** last)
{
- struct inl* new = NULL;
+ node_inl* new = NULL;
chunk contents;
unsigned char c;
int endpos;
@@ -971,7 +971,7 @@ static int parse_inline(subject* subj, struct inl ** last)
return 1;
}
-extern struct inl* parse_inlines(strbuf *input, reference** refmap)
+extern node_inl* parse_inlines(strbuf *input, reference** refmap)
{
subject subj;
subject_from_buf(&subj, input, refmap);
diff --git a/src/print.c b/src/print.c
index 63f63c8..01e9136 100644
--- a/src/print.c
+++ b/src/print.c
@@ -117,7 +117,7 @@ extern void print_blocks(block* b, int indent)
}
// Prettyprint an inline list, for debugging.
-extern void print_inlines(struct inl* ils, int indent)
+extern void print_inlines(node_inl* ils, int indent)
{
while(ils != NULL) {
for (int i=0; i < indent; i++) {
diff --git a/src/stmd.h b/src/stmd.h
index 9ed33ec..dbc8c8c 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -10,7 +10,7 @@
#define VERSION "0.1"
#define CODE_INDENT 4
-struct inl {
+struct node_inl {
enum {
INL_STRING,
INL_SOFTBREAK,
@@ -25,22 +25,26 @@ struct inl {
} tag;
union {
chunk literal;
- struct inl *inlines;
+ struct node_inl *inlines;
struct {
- struct inl *label;
+ struct node_inl *label;
unsigned char *url;
unsigned char *title;
} linkable;
} content;
- struct inl *next;
+ struct node_inl *next;
};
-typedef struct Reference {
+typedef struct node_inl node_inl;
+
+struct reference {
unsigned char *label;
unsigned char *url;
unsigned char *title;
- UT_hash_handle hh; // used by uthash
-} reference;
+ UT_hash_handle hh; // used by uthash
+};
+
+typedef struct reference reference;
// Types for blocks
@@ -87,7 +91,7 @@ typedef struct Block {
struct Block* parent;
struct Block* top;
strbuf string_content;
- struct inl* inline_content;
+ node_inl* inline_content;
union {
struct ListData list_data;
struct FencedCodeData fenced_code_data;
@@ -98,8 +102,8 @@ typedef struct Block {
struct Block * prev;
} block;
-struct inl* parse_inlines(strbuf *input, reference** refmap);
-void free_inlines(struct inl* e);
+node_inl* parse_inlines(strbuf *input, reference** refmap);
+void free_inlines(node_inl* e);
int parse_reference(strbuf *input, reference** refmap);
void free_reference(reference *ref);
@@ -116,11 +120,11 @@ void free_blocks(block* e);
extern block *stmd_parse_document(const unsigned char *buffer, size_t len);
extern block *stmd_parse_file(FILE *f);
-void print_inlines(struct inl* ils, int indent);
+void print_inlines(node_inl* ils, int indent);
void print_blocks(block* blk, int indent);
void blocks_to_html(strbuf *html, block *b, bool tight);
-void inlines_to_html(strbuf *html, struct inl *b);
+void inlines_to_html(strbuf *html, node_inl *b);
void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len);
--
cgit v1.2.3
From add5dd1b9a9ba8c58cdc6ca0bb62d287acb56278 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Thu, 4 Sep 2014 19:40:27 +0200
Subject: Remove warnings
---
Makefile | 4 ++--
src/inlines.c | 17 +++++++++++++++--
src/scanners.re | 41 ++++++++++++++++++++++++-----------------
3 files changed, 41 insertions(+), 21 deletions(-)
(limited to 'src/inlines.c')
diff --git a/Makefile b/Makefile
index ed4ddd5..0d2eb8b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
-CFLAGS=-g -O3 -Wall -Wextra -Wno-unused-variable -std=c99 -Isrc $(OPTFLAGS)
-LDFLAGS=-g -O3 -Wall -Wno-unused-variable # -Werror
+CFLAGS=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS)
+LDFLAGS=-g -O3 -Wall -Werror
SRCDIR=src
DATADIR=data
diff --git a/src/inlines.c b/src/inlines.c
index 6bb89da..5e0f3e5 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -108,13 +108,26 @@ extern void add_reference(reference** refmap, reference* ref)
}
}
+static unsigned char *bufdup(const unsigned char *buf)
+{
+ unsigned char *new = NULL;
+
+ if (!buf) {
+ int len = strlen((char *)buf);
+ new = malloc(len + 1);
+ memcpy(new, buf, len + 1);
+ }
+
+ return new;
+}
+
inline static node_inl* make_link_from_reference(node_inl* label, reference *ref)
{
node_inl* e = (node_inl*) malloc(sizeof(node_inl));
e->tag = INL_LINK;
e->content.linkable.label = label;
- e->content.linkable.url = strdup(ref->url);
- e->content.linkable.title = ref->title ? strdup(ref->title) : NULL;
+ e->content.linkable.url = bufdup(ref->url);
+ e->content.linkable.title = bufdup(ref->title);
e->next = NULL;
return e;
}
diff --git a/src/scanners.re b/src/scanners.re
index 71103f6..28aba9d 100644
--- a/src/scanners.re
+++ b/src/scanners.re
@@ -1,9 +1,5 @@
#include
-#define SCAN_DATA \
- const unsigned char *marker = NULL; \
- const unsigned char *start = p; \
-
/*!re2c
re2c:define:YYCTYPE = "unsigned char";
re2c:define:YYCURSOR = p;
@@ -61,7 +57,8 @@
// Try to match URI autolink after first <, returning number of chars matched.
extern int _scan_autolink_uri(const unsigned char *p)
{
- SCAN_DATA;
+ const unsigned char *marker = NULL;
+ const unsigned char *start = p;
/*!re2c
scheme [:]([^\x00-\x20<>\\]|escaped_char)*[>] { return (p - start); }
.? { return 0; }
@@ -71,7 +68,8 @@ extern int _scan_autolink_uri(const unsigned char *p)
// Try to match email autolink after first <, returning num of chars matched.
extern int _scan_autolink_email(const unsigned char *p)
{
- SCAN_DATA;
+ const unsigned char *marker = NULL;
+ const unsigned char *start = p;
/*!re2c
[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+
[@]
@@ -85,7 +83,8 @@ extern int _scan_autolink_email(const unsigned char *p)
// Try to match an HTML tag after first <, returning num of chars matched.
extern int _scan_html_tag(const unsigned char *p)
{
- SCAN_DATA;
+ const unsigned char *marker = NULL;
+ const unsigned char *start = p;
/*!re2c
htmltag { return (p - start); }
.? { return 0; }
@@ -96,7 +95,8 @@ extern int _scan_html_tag(const unsigned char *p)
// returning num of chars matched.
extern int _scan_html_block_tag(const unsigned char *p)
{
- SCAN_DATA;
+ const unsigned char *marker = NULL;
+ const unsigned char *start = p;
/*!re2c
[<] [/] blocktagname (spacechar | [>]) { return (p - start); }
[<] blocktagname (spacechar | [/>]) { return (p - start); }
@@ -111,7 +111,8 @@ extern int _scan_html_block_tag(const unsigned char *p)
// Newlines aren't ever allowed.
extern int _scan_link_url(const unsigned char *p)
{
- SCAN_DATA;
+ const unsigned char *marker = NULL;
+ const unsigned char *start = p;
/*!re2c
[ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); }
[ \n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); }
@@ -124,7 +125,8 @@ extern int _scan_link_url(const unsigned char *p)
// level of internal nesting (quotes within quotes).
extern int _scan_link_title(const unsigned char *p)
{
- SCAN_DATA;
+ const unsigned char *marker = NULL;
+ const unsigned char *start = p;
/*!re2c
["] (escaped_char|[^"\x00])* ["] { return (p - start); }
['] (escaped_char|[^'\x00])* ['] { return (p - start); }
@@ -136,7 +138,7 @@ extern int _scan_link_title(const unsigned char *p)
// Match space characters, including newlines.
extern int _scan_spacechars(const unsigned char *p)
{
- SCAN_DATA;
+ const unsigned char *start = p; \
/*!re2c
[ \t\n]* { return (p - start); }
. { return 0; }
@@ -146,7 +148,8 @@ extern int _scan_spacechars(const unsigned char *p)
// Match ATX header start.
extern int _scan_atx_header_start(const unsigned char *p)
{
- SCAN_DATA;
+ const unsigned char *marker = NULL;
+ const unsigned char *start = p;
/*!re2c
[#]{1,6} ([ ]+|[\n]) { return (p - start); }
.? { return 0; }
@@ -157,7 +160,7 @@ extern int _scan_atx_header_start(const unsigned char *p)
// 2 for level-2, 0 for no match.
extern int _scan_setext_header_line(const unsigned char *p)
{
- SCAN_DATA;
+ const unsigned char *marker = NULL;
/*!re2c
[=]+ [ ]* [\n] { return 1; }
[-]+ [ ]* [\n] { return 2; }
@@ -170,7 +173,8 @@ extern int _scan_setext_header_line(const unsigned char *p)
// spaces between the hyphens or asterisks."
extern int _scan_hrule(const unsigned char *p)
{
- SCAN_DATA;
+ const unsigned char *marker = NULL;
+ const unsigned char *start = p;
/*!re2c
([*][ ]*){3,} [ \t]* [\n] { return (p - start); }
([_][ ]*){3,} [ \t]* [\n] { return (p - start); }
@@ -182,7 +186,8 @@ extern int _scan_hrule(const unsigned char *p)
// Scan an opening code fence.
extern int _scan_open_code_fence(const unsigned char *p)
{
- SCAN_DATA;
+ const unsigned char *marker = NULL;
+ const unsigned char *start = p;
/*!re2c
[`]{3,} / [^`\n\x00]*[\n] { return (p - start); }
[~]{3,} / [^~\n\x00]*[\n] { return (p - start); }
@@ -193,7 +198,8 @@ extern int _scan_open_code_fence(const unsigned char *p)
// Scan a closing code fence with length at least len.
extern int _scan_close_code_fence(const unsigned char *p)
{
- SCAN_DATA;
+ const unsigned char *marker = NULL;
+ const unsigned char *start = p;
/*!re2c
([`]{3,} | [~]{3,}) / spacechar* [\n] { return (p - start); }
.? { return 0; }
@@ -204,7 +210,8 @@ extern int _scan_close_code_fence(const unsigned char *p)
// Returns number of chars matched.
extern int _scan_entity(const unsigned char *p)
{
- SCAN_DATA;
+ const unsigned char *marker = NULL;
+ const unsigned char *start = p;
/*!re2c
[&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;]
{ return (p - start); }
--
cgit v1.2.3
From 278b89d092cae8fe9cdd6346c69512886d36abbd Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Thu, 4 Sep 2014 20:04:21 +0200
Subject: Silly me
---
src/inlines.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
(limited to 'src/inlines.c')
diff --git a/src/inlines.c b/src/inlines.c
index 5e0f3e5..6b17027 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -112,7 +112,7 @@ static unsigned char *bufdup(const unsigned char *buf)
{
unsigned char *new = NULL;
- if (!buf) {
+ if (buf) {
int len = strlen((char *)buf);
new = malloc(len + 1);
memcpy(new, buf, len + 1);
--
cgit v1.2.3
From 61e3e606e64221eaa5cf3d83dc598d5a42818d10 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Sat, 6 Sep 2014 20:48:05 +0200
Subject: UTF8-aware detabbing and entity handling
---
Makefile | 13 ++++++-----
src/blocks.c | 35 ++++++------------------------
src/html/houdini.h | 2 ++
src/html/html.c | 1 -
src/inlines.c | 63 ++++++++++++++++++++++--------------------------------
src/print.c | 5 -----
src/stmd.h | 3 ---
src/utf8.c | 59 ++++++++++++++++++++++++++++++++++++++++++++------
8 files changed, 95 insertions(+), 86 deletions(-)
(limited to 'src/inlines.c')
diff --git a/Makefile b/Makefile
index 0d2eb8b..b5e487d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
-CFLAGS=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS)
-LDFLAGS=-g -O3 -Wall -Werror
+CFLAGS=-g -pg -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS)
+LDFLAGS=-g -pg -O3 -Wall -Werror
SRCDIR=src
DATADIR=data
@@ -41,11 +41,11 @@ testjs: spec.txt
benchjs:
node js/bench.js ${BENCHINP}
-HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o
+HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o $(SRCDIR)/html/houdini_html_u.o
STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o
-$(PROG): $(SRCDIR)/main.c $(HTML_OBJ) $(STMD_OBJ)
- $(CC) $(LDFLAGS) -o $@ $^
+$(PROG): $(SRCDIR)/html/html_unescape.h $(SRCDIR)/case_fold_switch.inc $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c
+ $(CC) $(LDFLAGS) -o $@ $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c
$(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re
re2c --case-insensitive -bis $< > $@ || (rm $@ && false)
@@ -53,6 +53,9 @@ $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re
$(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding-3.2.0.txt
perl mkcasefold.pl < $< > $@
+$(SRCDIR)/html/html_unescape.h: $(SRCDIR)/html/html_unescape.gperf
+ gperf -I -t -N find_entity -H hash_entity -K entity -C -l --null-strings -m5 $< > $@
+
.PHONY: leakcheck clean fuzztest dingus upload
dingus:
diff --git a/src/blocks.c b/src/blocks.c
index f671b5e..8c7d49c 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -5,6 +5,8 @@
#include
#include "stmd.h"
+#include "utf8.h"
+#include "html/houdini.h"
#include "scanners.h"
#include "uthash.h"
@@ -184,7 +186,7 @@ static void finalize(node_block* b, int line_number)
firstlinelen = strbuf_strchr(&b->string_content, '\n', 0);
strbuf_init(&b->attributes.fenced_code_data.info, 0);
- strbuf_set(
+ houdini_unescape_html_f(
&b->attributes.fenced_code_data.info,
b->string_content.ptr,
firstlinelen
@@ -369,31 +371,6 @@ static int lists_match(struct ListData list_data,
list_data.bullet_char == item_data.bullet_char);
}
-static void expand_tabs(strbuf *ob, const unsigned char *line, size_t size)
-{
- size_t i = 0, tab = 0;
-
- while (i < size) {
- size_t org = i;
-
- while (i < size && line[i] != '\t') {
- i++; tab++;
- }
-
- if (i > org)
- strbuf_put(ob, line + org, i - org);
-
- if (i >= size)
- break;
-
- do {
- strbuf_putc(ob, ' '); tab++;
- } while (tab % 4);
-
- i++;
- }
-}
-
static node_block *finalize_document(node_block *document, int linenum)
{
while (document != document->top) {
@@ -415,7 +392,7 @@ extern node_block *stmd_parse_file(FILE *f)
node_block *document = make_document();
while (fgets((char *)buffer, sizeof(buffer), f)) {
- expand_tabs(&line, buffer, strlen((char *)buffer));
+ utf8proc_detab(&line, buffer, strlen((char *)buffer));
incorporate_line(&line, linenum, &document);
strbuf_clear(&line);
linenum++;
@@ -436,10 +413,10 @@ extern node_block *stmd_parse_document(const unsigned char *buffer, size_t len)
const unsigned char *eol = memchr(buffer, '\n', end - buffer);
if (!eol) {
- expand_tabs(&line, buffer, end - buffer);
+ utf8proc_detab(&line, buffer, end - buffer);
buffer = end;
} else {
- expand_tabs(&line, buffer, (eol - buffer) + 1);
+ utf8proc_detab(&line, buffer, (eol - buffer) + 1);
buffer += (eol - buffer) + 1;
}
diff --git a/src/html/houdini.h b/src/html/houdini.h
index 1e54d20..5fd690d 100644
--- a/src/html/houdini.h
+++ b/src/html/houdini.h
@@ -25,9 +25,11 @@ extern "C" {
#define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
#define HOUDINI_UNESCAPED_SIZE(x) (x)
+extern size_t houdini_unescape_ent(strbuf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure);
extern int houdini_unescape_html(strbuf *ob, const uint8_t *src, size_t size);
+extern void houdini_unescape_html_f(strbuf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_xml(strbuf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_uri(strbuf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_url(strbuf *ob, const uint8_t *src, size_t size);
diff --git a/src/html/html.c b/src/html/html.c
index 758ec80..595dfcd 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -166,7 +166,6 @@ void inlines_to_html(strbuf *html, node_inl* ils)
break;
case INL_RAW_HTML:
- case INL_ENTITY:
strbuf_put(html,
ils->content.literal.data,
ils->content.literal.len);
diff --git a/src/inlines.c b/src/inlines.c
index 6b17027..7b27150 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -5,6 +5,8 @@
#include
#include "stmd.h"
+#include "html/houdini.h"
+#include "utf8.h"
#include "uthash.h"
#include "scanners.h"
@@ -176,7 +178,6 @@ inline static node_inl* make_simple(int t)
#define make_str(s) make_literal(INL_STRING, s)
#define make_code(s) make_literal(INL_CODE, s)
#define make_raw_html(s) make_literal(INL_RAW_HTML, s)
-#define make_entity(s) make_literal(INL_ENTITY, s)
#define make_linebreak() make_simple(INL_LINEBREAK)
#define make_softbreak() make_simple(INL_SOFTBREAK)
#define make_emph(contents) make_inlines(INL_EMPH, contents)
@@ -191,7 +192,6 @@ extern void free_inlines(node_inl* e)
case INL_STRING:
case INL_RAW_HTML:
case INL_CODE:
- case INL_ENTITY:
chunk_free(&e->content.literal);
break;
case INL_LINEBREAK:
@@ -540,45 +540,34 @@ static node_inl* handle_backslash(subject *subj)
// Assumes the subject has an '&' character at the current position.
static node_inl* handle_entity(subject* subj)
{
- int match;
- node_inl *result;
- match = scan_entity(&subj->input, subj->pos);
- if (match) {
- result = make_entity(chunk_dup(&subj->input, subj->pos, match));
- subj->pos += match;
- } else {
- advance(subj);
- result = make_str(chunk_literal("&"));
- }
- return result;
+ strbuf ent = GH_BUF_INIT;
+ size_t len;
+
+ advance(subj);
+
+ len = houdini_unescape_ent(&ent,
+ subj->input.data + subj->pos,
+ subj->input.len - subj->pos
+ );
+
+ if (len == 0)
+ return make_str(chunk_literal("&"));
+
+ subj->pos += len;
+ return make_str(chunk_buf_detach(&ent));
}
// Like make_str, but parses entities.
// Returns an inline sequence consisting of str and entity elements.
static node_inl *make_str_with_entities(chunk *content)
{
- node_inl *result = NULL;
- node_inl *new;
- int searchpos;
- char c;
- subject subj;
-
- subject_from_chunk(&subj, content, NULL);
+ strbuf unescaped = GH_BUF_INIT;
- while ((c = peek_char(&subj))) {
- switch (c) {
- case '&':
- new = handle_entity(&subj);
- break;
- default:
- searchpos = chunk_strchr(&subj.input, '&', subj.pos);
- new = make_str(chunk_dup(&subj.input, subj.pos, searchpos - subj.pos));
- subj.pos = searchpos;
- }
- result = append_inlines(result, new);
+ if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) {
+ return make_str(chunk_buf_detach(&unescaped));
+ } else {
+ return make_str(*content);
}
-
- return result;
}
// Destructively unescape a string: remove backslashes before punctuation chars.
@@ -611,9 +600,9 @@ static unsigned char *clean_url(chunk *url, int is_email)
strbuf_puts(&buf, "mailto:");
if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
- strbuf_put(&buf, url->data + 1, url->len - 2);
+ houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
} else {
- strbuf_put(&buf, url->data, url->len);
+ houdini_unescape_html_f(&buf, url->data, url->len);
}
unescape_buffer(&buf);
@@ -636,9 +625,9 @@ static unsigned char *clean_title(chunk *title)
if ((first == '\'' && last == '\'') ||
(first == '(' && last == ')') ||
(first == '"' && last == '"')) {
- strbuf_set(&buf, title->data + 1, title->len - 2);
+ houdini_unescape_html_f(&buf, title->data + 1, title->len - 2);
} else {
- strbuf_set(&buf, title->data, title->len);
+ houdini_unescape_html_f(&buf, title->data, title->len);
}
unescape_buffer(&buf);
diff --git a/src/print.c b/src/print.c
index 0ff86fa..9240dac 100644
--- a/src/print.c
+++ b/src/print.c
@@ -145,11 +145,6 @@ extern void print_inlines(node_inl* ils, int indent)
print_str(ils->content.literal.data, ils->content.literal.len);
putchar('\n');
break;
- case INL_ENTITY:
- printf("entity ");
- print_str(ils->content.literal.data, ils->content.literal.len);
- putchar('\n');
- break;
case INL_LINK:
case INL_IMAGE:
printf("%s url=", ils->tag == INL_LINK ? "link" : "image");
diff --git a/src/stmd.h b/src/stmd.h
index be65371..c80eeda 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -17,7 +17,6 @@ struct node_inl {
INL_LINEBREAK,
INL_CODE,
INL_RAW_HTML,
- INL_ENTITY,
INL_EMPH,
INL_STRONG,
INL_LINK,
@@ -133,6 +132,4 @@ void print_blocks(node_block* blk, int indent);
void blocks_to_html(strbuf *html, node_block *b, bool tight);
void inlines_to_html(strbuf *html, node_inl *b);
-void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len);
-
#endif
diff --git a/src/utf8.c b/src/utf8.c
index cebd872..12d7ba5 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -3,7 +3,7 @@
#include
#include
-#include "stmd.h"
+#include "utf8.h"
static const int8_t utf8proc_utf8class[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -23,6 +23,12 @@ static const int8_t utf8proc_utf8class[256] = {
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 };
+static void encode_unknown(strbuf *buf)
+{
+ static const unsigned char repl[] = {239, 191, 189};
+ strbuf_put(buf, repl, 3);
+}
+
ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len)
{
ssize_t length, i;
@@ -46,6 +52,46 @@ ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len)
return length;
}
+void utf8proc_detab(strbuf *ob, const unsigned char *line, size_t size)
+{
+ static const unsigned char whitespace[] = " ";
+
+ size_t i = 0, tab = 0;
+
+ while (i < size) {
+ size_t org = i;
+
+ while (i < size && line[i] != '\t' && line[i] <= 0x80) {
+ i++; tab++;
+ }
+
+ if (i > org)
+ strbuf_put(ob, line + org, i - org);
+
+ if (i >= size)
+ break;
+
+ if (line[i] == '\t') {
+ int numspaces = 4 - (tab % 4);
+ strbuf_put(ob, whitespace, numspaces);
+ i += 1;
+ tab += numspaces;
+ } else {
+ ssize_t charlen = utf8proc_charlen(line + i, size - i);
+
+ if (charlen < 0) {
+ encode_unknown(ob);
+ i++;
+ } else {
+ strbuf_put(ob, line + i, charlen);
+ i += charlen;
+ }
+
+ tab += 1;
+ }
+ }
+}
+
ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst)
{
ssize_t length;
@@ -89,9 +135,9 @@ void utf8proc_encode_char(int32_t uc, strbuf *buf)
unsigned char dst[4];
int len = 0;
- if (uc < 0x00) {
- assert(false);
- } else if (uc < 0x80) {
+ assert(uc >= 0);
+
+ if (uc < 0x80) {
dst[0] = uc;
len = 1;
} else if (uc < 0x800) {
@@ -116,7 +162,8 @@ void utf8proc_encode_char(int32_t uc, strbuf *buf)
dst[3] = 0x80 + (uc & 0x3F);
len = 4;
} else {
- assert(false);
+ encode_unknown(buf);
+ return;
}
strbuf_put(buf, dst, len);
@@ -133,7 +180,7 @@ void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len)
ssize_t char_len = utf8proc_iterate(str, len, &c);
if (char_len < 0) {
- bufpush(0xFFFD);
+ encode_unknown(dest);
continue;
}
--
cgit v1.2.3
From 798f58a2b614280201141b398c8e498cecc8ab5e Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Sat, 6 Sep 2014 21:17:23 +0200
Subject: This is going well
---
spec.txt | 35 +++++++++++++++++++-----------
src/inlines.c | 68 +++++++++++++++++++++++++++++++++++------------------------
2 files changed, 64 insertions(+), 39 deletions(-)
(limited to 'src/inlines.c')
diff --git a/spec.txt b/spec.txt
index 616cb96..ebd6d98 100644
--- a/spec.txt
+++ b/spec.txt
@@ -3688,7 +3688,7 @@ raw HTML:
.
.
-http://google.com?find=\*
+http://google.com?find=\*
.
.
@@ -3727,25 +3727,37 @@ foo
## Entities
-Entities are parsed as entities, not as literal text, in all contexts
-except code spans and code blocks. Three kinds of entities are recognized.
+With the goal of making this standard as HTML-agnostic as possible, all HTML valid HTML Entities in any
+context are recognized as such and converted into their actual values (i.e. the UTF8 characters representing
+the entity itself) before they are stored in the AST.
+
+This allows implementations that target HTML output to trivially escape the entities when generating HTML,
+and simplifies the job of implementations targetting other languages, as these will only need to handle the
+UTF8 chars and need not be HTML-entity aware.
[Named entities](#name-entities) consist of `&`
-+ a string of 2-32 alphanumerics beginning with a letter + `;`.
++ any of the valid HTML5 entity names + `;`. The [following document](http://www.whatwg.org/specs/web-apps/current-work/multipage/entities.json)
+is used as an authoritative source of the valid entity names and their corresponding codepoints.
+
+Conforming implementations that target Markdown don't need to generate entities for all the valid
+named entities that exist, with the exception of `"` (`"`), `&` (`&`), `<` (`<`) and `>` (`>`),
+which always need to be written as entities for security reasons.
.
& © Æ Ď ¾ ℋ ⅆ ∲
.
- & © Æ Ď ¾ ℋ ⅆ ∲
+ & © Æ Ď ¾ ℋ ⅆ ∲
.
[Decimal entities](#decimal-entities)
-consist of `` + a string of 1--8 arabic digits + `;`.
+consist of `` + a string of 1--8 arabic digits + `;`. Again, these entities need to be recognised
+and tranformed into their corresponding UTF8 codepoints. Invalid Unicode codepoints will be written
+as the "unknown codepoint" character (`0xFFFD`)
.
- # Ӓ Ϡ
+# Ӓ Ϡ
.
- # Ӓ Ϡ
+# Ӓ Ϡ �
.
[Hexadecimal entities](#hexadecimal-entities)
@@ -3767,7 +3779,7 @@ Here are some nonentities:
.
Although HTML5 does accept some entities without a trailing semicolon
-(such as `©`), these are not recognized as entities here:
+(such as `©`), these are not recognized as entities here, because it makes the grammar too ambiguous:
.
©
@@ -3775,13 +3787,12 @@ Although HTML5 does accept some entities without a trailing semicolon
©
.
-On the other hand, many strings that are not on the list of HTML5
-named entities are recognized as entities here:
+Strings that are not on the list of HTML5 named entities are not recognized as entities either:
.
&MadeUpEntity;
.
-&MadeUpEntity;
+&MadeUpEntity;
.
Entities are recognized in any context besides code spans or
diff --git a/src/inlines.c b/src/inlines.c
index 7b27150..aa0e13e 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -20,8 +20,9 @@ typedef struct Subject {
reference* lookup_reference(reference** refmap, chunk *label);
reference* make_reference(chunk *label, chunk *url, chunk *title);
-static unsigned char *clean_url(chunk *url, int is_email);
+static unsigned char *clean_url(chunk *url);
static unsigned char *clean_title(chunk *title);
+static unsigned char *clean_autolink(chunk *url, int is_email);
inline static void chunk_free(chunk *c);
inline static void chunk_trim(chunk *c);
@@ -91,7 +92,7 @@ extern reference* make_reference(chunk *label, chunk *url, chunk *title)
reference *ref;
ref = malloc(sizeof(reference));
ref->label = normalize_reference(label);
- ref->url = clean_url(url, 0);
+ ref->url = clean_url(url);
ref->title = clean_title(title);
return ref;
}
@@ -123,27 +124,31 @@ static unsigned char *bufdup(const unsigned char *buf)
return new;
}
-inline static node_inl* make_link_from_reference(node_inl* label, reference *ref)
+static inline node_inl *make_link_(node_inl *label, unsigned char *url, unsigned char *title)
{
node_inl* e = (node_inl*) malloc(sizeof(node_inl));
e->tag = INL_LINK;
e->content.linkable.label = label;
- e->content.linkable.url = bufdup(ref->url);
- e->content.linkable.title = bufdup(ref->title);
+ e->content.linkable.url = url;
+ e->content.linkable.title = title;
e->next = NULL;
return e;
}
+inline static node_inl* make_ref_link(node_inl* label, reference *ref)
+{
+ return make_link_(label, bufdup(ref->url), bufdup(ref->title));
+}
+
+inline static node_inl* make_autolink(node_inl* label, chunk url, int is_email)
+{
+ return make_link_(label, clean_autolink(&url, is_email), NULL);
+}
+
// Create an inline with a linkable string value.
-inline static node_inl* make_link(node_inl* label, chunk url, chunk title, int is_email)
+inline static node_inl* make_link(node_inl* label, chunk url, chunk title)
{
- node_inl* e = (node_inl*) malloc(sizeof(node_inl));
- e->tag = INL_LINK;
- e->content.linkable.label = label;
- e->content.linkable.url = clean_url(&url, is_email);
- e->content.linkable.title = clean_title(&title);
- e->next = NULL;
- return e;
+ return make_link_(label, clean_url(&url), clean_title(&title));
}
inline static node_inl* make_inlines(int t, node_inl* contents)
@@ -587,7 +592,7 @@ extern void unescape_buffer(strbuf *buf)
// Clean a URL: remove surrounding whitespace and surrounding <>,
// and remove \ that escape punctuation.
-static unsigned char *clean_url(chunk *url, int is_email)
+static unsigned char *clean_url(chunk *url)
{
strbuf buf = GH_BUF_INIT;
@@ -596,9 +601,6 @@ static unsigned char *clean_url(chunk *url, int is_email)
if (url->len == 0)
return NULL;
- if (is_email)
- strbuf_puts(&buf, "mailto:");
-
if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
} else {
@@ -609,6 +611,22 @@ static unsigned char *clean_url(chunk *url, int is_email)
return strbuf_detach(&buf);
}
+static unsigned char *clean_autolink(chunk *url, int is_email)
+{
+ strbuf buf = GH_BUF_INIT;
+
+ chunk_trim(url);
+
+ if (url->len == 0)
+ return NULL;
+
+ if (is_email)
+ strbuf_puts(&buf, "mailto:");
+
+ houdini_unescape_html_f(&buf, url->data, url->len);
+ return strbuf_detach(&buf);
+}
+
// Clean a title: remove surrounding quotes and remove \ that escape punctuation.
static unsigned char *clean_title(chunk *title)
{
@@ -649,11 +667,9 @@ static node_inl* handle_pointy_brace(subject* subj)
contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
subj->pos += matchlen;
- return make_link(
+ return make_autolink(
make_str_with_entities(&contents),
- contents,
- chunk_literal(""),
- 0
+ contents, 0
);
}
@@ -663,11 +679,9 @@ static node_inl* handle_pointy_brace(subject* subj)
contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
subj->pos += matchlen;
- return make_link(
+ return make_autolink(
make_str_with_entities(&contents),
- contents,
- chunk_literal(""),
- 1
+ contents, 1
);
}
@@ -792,7 +806,7 @@ static node_inl* handle_left_bracket(subject* subj)
title = chunk_dup(&subj->input, starttitle, endtitle - starttitle);
lab = parse_chunk_inlines(&rawlabel, NULL);
- return make_link(lab, url, title, 0);
+ return make_link(lab, url, title);
} else {
// if we get here, we matched a label but didn't get further:
subj->pos = endlabel;
@@ -823,7 +837,7 @@ static node_inl* handle_left_bracket(subject* subj)
ref = lookup_reference(subj->reference_map, &reflabel);
if (ref != NULL) { // found
lab = parse_chunk_inlines(&rawlabel, NULL);
- result = make_link_from_reference(lab, ref);
+ result = make_ref_link(lab, ref);
} else {
subj->pos = endlabel;
lab = parse_chunk_inlines(&rawlabel, subj->reference_map);
--
cgit v1.2.3
From 94a79a605f3e76a43f1f87a5044f6761b99e5ca5 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Wed, 10 Sep 2014 18:33:27 +0200
Subject: Cleanup reference implementation
---
Makefile | 2 +-
src/blocks.c | 16 ++---
src/buffer.c | 43 ++++++++++++++
src/buffer.h | 2 +
src/inlines.c | 176 +++++++------------------------------------------------
src/references.c | 109 ++++++++++++++++++++++++++++++++++
src/references.h | 27 +++++++++
src/stmd.h | 26 +++-----
src/utf8.c | 10 ++--
src/utf8.h | 5 +-
10 files changed, 225 insertions(+), 191 deletions(-)
create mode 100644 src/references.c
create mode 100644 src/references.h
(limited to 'src/inlines.c')
diff --git a/Makefile b/Makefile
index 5d13272..11e2141 100644
--- a/Makefile
+++ b/Makefile
@@ -42,7 +42,7 @@ benchjs:
node js/bench.js ${BENCHINP}
HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o $(SRCDIR)/html/houdini_html_u.o
-STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o
+STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o $(SRCDIR)/references.c
$(PROG): $(SRCDIR)/html/html_unescape.h $(SRCDIR)/case_fold_switch.inc $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c
$(CC) $(LDFLAGS) -o $@ $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c
diff --git a/src/blocks.c b/src/blocks.c
index 72b2dc2..30a8284 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -8,7 +8,6 @@
#include "utf8.h"
#include "html/houdini.h"
#include "scanners.h"
-#include "uthash.h"
#define peek_at(i, n) (i)->data[n]
@@ -36,12 +35,7 @@ static node_block* make_block(int tag, int start_line, int start_column)
extern node_block* make_document()
{
node_block *e = make_block(BLOCK_DOCUMENT, 1, 1);
- reference *map = NULL;
- reference ** refmap;
-
- refmap = (reference**) malloc(sizeof(reference*));
- *refmap = map;
- e->as.document.refmap = refmap;
+ e->as.document.refmap = reference_map_new();
e->top = e;
return e;
@@ -164,7 +158,7 @@ static void finalize(node_block* b, int line_number)
case BLOCK_PARAGRAPH:
pos = 0;
while (strbuf_at(&b->string_content, 0) == '[' &&
- (pos = parse_reference(&b->string_content, b->top->as.document.refmap))) {
+ (pos = parse_reference_inline(&b->string_content, b->top->as.document.refmap))) {
strbuf_drop(&b->string_content, pos);
}
@@ -192,7 +186,7 @@ static void finalize(node_block* b, int line_number)
strbuf_drop(&b->string_content, firstlinelen + 1);
strbuf_trim(&b->as.code.info);
- unescape_buffer(&b->as.code.info);
+ strbuf_unescape(&b->as.code.info);
break;
case BLOCK_LIST: // determine tight/loose status
@@ -268,7 +262,7 @@ extern void free_blocks(node_block* e)
if (e->tag == BLOCK_FENCED_CODE) {
strbuf_free(&e->as.code.info);
} else if (e->tag == BLOCK_DOCUMENT) {
- free_reference_map(e->as.document.refmap);
+ reference_map_free(e->as.document.refmap);
}
free_blocks(e->children);
free(e);
@@ -278,7 +272,7 @@ extern void free_blocks(node_block* e)
// Walk through node_block and all children, recursively, parsing
// string content into inline content where appropriate.
-void process_inlines(node_block* cur, reference** refmap)
+void process_inlines(node_block* cur, reference_map *refmap)
{
switch (cur->tag) {
case BLOCK_PARAGRAPH:
diff --git a/src/buffer.c b/src/buffer.c
index 90c2186..cdf8ca0 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -308,3 +308,46 @@ void strbuf_trim(strbuf *buf)
buf->ptr[buf->size] = '\0';
}
+
+// Destructively modify string, collapsing consecutive
+// space and newline characters into a single space.
+void strbuf_normalize_whitespace(strbuf *s)
+{
+ bool last_char_was_space = false;
+ int r, w;
+
+ for (r = 0, w = 0; r < s->size; ++r) {
+ switch (s->ptr[r]) {
+ case ' ':
+ case '\n':
+ if (last_char_was_space)
+ break;
+
+ s->ptr[w++] = ' ';
+ last_char_was_space = true;
+ break;
+
+ default:
+ s->ptr[w++] = s->ptr[r];
+ last_char_was_space = false;
+ }
+ }
+
+ strbuf_truncate(s, w);
+}
+
+// Destructively unescape a string: remove backslashes before punctuation chars.
+extern void strbuf_unescape(strbuf *buf)
+{
+ int r, w;
+
+ for (r = 0, w = 0; r < buf->size; ++r) {
+ if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1]))
+ continue;
+
+ buf->ptr[w++] = buf->ptr[r];
+ }
+
+ strbuf_truncate(buf, w);
+}
+
diff --git a/src/buffer.h b/src/buffer.h
index 6f45cbb..1bc1eee 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -108,5 +108,7 @@ int strbuf_strrchr(const strbuf *buf, int c, int pos);
void strbuf_drop(strbuf *buf, int n);
void strbuf_truncate(strbuf *buf, int len);
void strbuf_trim(strbuf *buf);
+void strbuf_normalize_whitespace(strbuf *s);
+void strbuf_unescape(strbuf *s);
#endif
diff --git a/src/inlines.c b/src/inlines.c
index aa0e13e..3040f09 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -7,110 +7,23 @@
#include "stmd.h"
#include "html/houdini.h"
#include "utf8.h"
-#include "uthash.h"
#include "scanners.h"
typedef struct Subject {
chunk input;
int pos;
- int label_nestlevel;
- reference** reference_map;
+ int label_nestlevel;
+ reference_map *refmap;
} subject;
-reference* lookup_reference(reference** refmap, chunk *label);
-reference* make_reference(chunk *label, chunk *url, chunk *title);
-
-static unsigned char *clean_url(chunk *url);
-static unsigned char *clean_title(chunk *title);
-static unsigned char *clean_autolink(chunk *url, int is_email);
-
-inline static void chunk_free(chunk *c);
-inline static void chunk_trim(chunk *c);
-
-inline static chunk chunk_literal(const char *data);
-inline static chunk chunk_buf_detach(strbuf *buf);
-inline static chunk chunk_dup(const chunk *ch, int pos, int len);
-
-static node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
+static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap);
static node_inl *parse_inlines_while(subject* subj, int (*f)(subject*));
static int parse_inline(subject* subj, node_inl ** last);
-static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap);
-static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap);
+static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap);
+static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap);
static int subject_find_special_char(subject *subj);
-static void normalize_whitespace(strbuf *s);
-
-extern void free_reference(reference *ref) {
- free(ref->label);
- free(ref->url);
- free(ref->title);
- free(ref);
-}
-
-extern void free_reference_map(reference **refmap) {
- /* free the hash table contents */
- reference *s;
- reference *tmp;
- if (refmap != NULL) {
- HASH_ITER(hh, *refmap, s, tmp) {
- HASH_DEL(*refmap, s);
- free_reference(s);
- }
- free(refmap);
- }
-}
-
-// normalize reference: collapse internal whitespace to single space,
-// remove leading/trailing whitespace, case fold
-static unsigned char *normalize_reference(chunk *ref)
-{
- strbuf normalized = GH_BUF_INIT;
-
- utf8proc_case_fold(&normalized, ref->data, ref->len);
- strbuf_trim(&normalized);
- normalize_whitespace(&normalized);
-
- return strbuf_detach(&normalized);
-}
-
-// Returns reference if refmap contains a reference with matching
-// label, otherwise NULL.
-extern reference* lookup_reference(reference** refmap, chunk *label)
-{
- reference *ref = NULL;
- unsigned char *norm = normalize_reference(label);
- if (refmap != NULL) {
- HASH_FIND_STR(*refmap, (char*)norm, ref);
- }
- free(norm);
- return ref;
-}
-
-extern reference* make_reference(chunk *label, chunk *url, chunk *title)
-{
- reference *ref;
- ref = malloc(sizeof(reference));
- ref->label = normalize_reference(label);
- ref->url = clean_url(url);
- ref->title = clean_title(title);
- return ref;
-}
-
-extern void add_reference(reference** refmap, reference* ref)
-{
- reference * t = NULL;
- const char *label = (const char *)ref->label;
-
- HASH_FIND(hh, *refmap, label, strlen(label), t);
-
- if (t == NULL) {
- HASH_ADD_KEYPTR(hh, *refmap, label, strlen(label), ref);
- } else {
- free_reference(ref); // we free this now since it won't be in the refmap
- }
-}
-
static unsigned char *bufdup(const unsigned char *buf)
{
unsigned char *new = NULL;
@@ -236,26 +149,26 @@ inline static node_inl* append_inlines(node_inl* a, node_inl* b)
return a;
}
-static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap)
+static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap)
{
e->input.data = buffer->ptr;
e->input.len = buffer->size;
e->input.alloc = 0;
e->pos = 0;
e->label_nestlevel = 0;
- e->reference_map = refmap;
+ e->refmap = refmap;
chunk_rtrim(&e->input);
}
-static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap)
+static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap)
{
e->input.data = chunk->data;
e->input.len = chunk->len;
e->input.alloc = 0;
e->pos = 0;
e->label_nestlevel = 0;
- e->reference_map = refmap;
+ e->refmap = refmap;
chunk_rtrim(&e->input);
}
@@ -325,33 +238,6 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
return (subj->pos);
}
-// Destructively modify string, collapsing consecutive
-// space and newline characters into a single space.
-static void normalize_whitespace(strbuf *s)
-{
- bool last_char_was_space = false;
- int r, w;
-
- for (r = 0, w = 0; r < s->size; ++r) {
- switch (s->ptr[r]) {
- case ' ':
- case '\n':
- if (last_char_was_space)
- break;
-
- s->ptr[w++] = ' ';
- last_char_was_space = true;
- break;
-
- default:
- s->ptr[w++] = s->ptr[r];
- last_char_was_space = false;
- }
- }
-
- strbuf_truncate(s, w);
-}
-
// Parse backtick code section or raw backticks, return an inline.
// Assumes that the subject has a backtick at the current position.
static node_inl* handle_backticks(subject *subj)
@@ -368,7 +254,7 @@ static node_inl* handle_backticks(subject *subj)
strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
strbuf_trim(&buf);
- normalize_whitespace(&buf);
+ strbuf_normalize_whitespace(&buf);
return make_code(chunk_buf_detach(&buf));
}
@@ -575,24 +461,9 @@ static node_inl *make_str_with_entities(chunk *content)
}
}
-// Destructively unescape a string: remove backslashes before punctuation chars.
-extern void unescape_buffer(strbuf *buf)
-{
- int r, w;
-
- for (r = 0, w = 0; r < buf->size; ++r) {
- if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1]))
- continue;
-
- buf->ptr[w++] = buf->ptr[r];
- }
-
- strbuf_truncate(buf, w);
-}
-
// Clean a URL: remove surrounding whitespace and surrounding <>,
// and remove \ that escape punctuation.
-static unsigned char *clean_url(chunk *url)
+unsigned char *clean_url(chunk *url)
{
strbuf buf = GH_BUF_INIT;
@@ -607,11 +478,11 @@ static unsigned char *clean_url(chunk *url)
houdini_unescape_html_f(&buf, url->data, url->len);
}
- unescape_buffer(&buf);
+ strbuf_unescape(&buf);
return strbuf_detach(&buf);
}
-static unsigned char *clean_autolink(chunk *url, int is_email)
+unsigned char *clean_autolink(chunk *url, int is_email)
{
strbuf buf = GH_BUF_INIT;
@@ -628,7 +499,7 @@ static unsigned char *clean_autolink(chunk *url, int is_email)
}
// Clean a title: remove surrounding quotes and remove \ that escape punctuation.
-static unsigned char *clean_title(chunk *title)
+unsigned char *clean_title(chunk *title)
{
strbuf buf = GH_BUF_INIT;
unsigned char first, last;
@@ -648,7 +519,7 @@ static unsigned char *clean_title(chunk *title)
houdini_unescape_html_f(&buf, title->data, title->len);
}
- unescape_buffer(&buf);
+ strbuf_unescape(&buf);
return strbuf_detach(&buf);
}
@@ -810,7 +681,7 @@ static node_inl* handle_left_bracket(subject* subj)
} else {
// if we get here, we matched a label but didn't get further:
subj->pos = endlabel;
- lab = parse_chunk_inlines(&rawlabel, subj->reference_map);
+ lab = parse_chunk_inlines(&rawlabel, subj->refmap);
result = append_inlines(make_str(chunk_literal("[")),
append_inlines(lab,
make_str(chunk_literal("]"))));
@@ -834,13 +705,13 @@ static node_inl* handle_left_bracket(subject* subj)
}
// lookup rawlabel in subject->reference_map:
- ref = lookup_reference(subj->reference_map, &reflabel);
+ ref = reference_lookup(subj->refmap, &reflabel);
if (ref != NULL) { // found
lab = parse_chunk_inlines(&rawlabel, NULL);
result = make_ref_link(lab, ref);
} else {
subj->pos = endlabel;
- lab = parse_chunk_inlines(&rawlabel, subj->reference_map);
+ lab = parse_chunk_inlines(&rawlabel, subj->refmap);
result = append_inlines(make_str(chunk_literal("[")),
append_inlines(lab, make_str(chunk_literal("]"))));
}
@@ -887,7 +758,7 @@ extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*))
return result;
}
-node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap)
+node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap)
{
subject subj;
subject_from_chunk(&subj, chunk, refmap);
@@ -987,7 +858,7 @@ static int parse_inline(subject* subj, node_inl ** last)
return 1;
}
-extern node_inl* parse_inlines(strbuf *input, reference** refmap)
+extern node_inl* parse_inlines(strbuf *input, reference_map *refmap)
{
subject subj;
subject_from_buf(&subj, input, refmap);
@@ -1009,7 +880,7 @@ void spnl(subject* subj)
// Modify refmap if a reference is encountered.
// Return 0 if no reference found, otherwise position of subject
// after reference is parsed.
-extern int parse_reference(strbuf *input, reference** refmap)
+int parse_reference_inline(strbuf *input, reference_map *refmap)
{
subject subj;
@@ -1019,7 +890,6 @@ extern int parse_reference(strbuf *input, reference** refmap)
int matchlen = 0;
int beforetitle;
- reference *new = NULL;
subject_from_buf(&subj, input, NULL);
@@ -1065,9 +935,7 @@ extern int parse_reference(strbuf *input, reference** refmap)
return 0;
}
// insert reference into refmap
- new = make_reference(&lab, &url, &title);
- add_reference(refmap, new);
-
+ reference_create(refmap, &lab, &url, &title);
return subj.pos;
}
diff --git a/src/references.c b/src/references.c
new file mode 100644
index 0000000..ff64b00
--- /dev/null
+++ b/src/references.c
@@ -0,0 +1,109 @@
+#include "stmd.h"
+#include "utf8.h"
+#include "references.h"
+
+static unsigned int
+refhash(const unsigned char *link_ref)
+{
+ unsigned int hash = 0;
+
+ while (*link_ref)
+ hash = (*link_ref++) + (hash << 6) + (hash << 16) - hash;
+
+ return hash;
+}
+
+// normalize reference: collapse internal whitespace to single space,
+// remove leading/trailing whitespace, case fold
+static unsigned char *normalize_reference(chunk *ref)
+{
+ strbuf normalized = GH_BUF_INIT;
+
+ utf8proc_case_fold(&normalized, ref->data, ref->len);
+ strbuf_trim(&normalized);
+ strbuf_normalize_whitespace(&normalized);
+
+ return strbuf_detach(&normalized);
+}
+
+static void add_reference(reference_map *map, reference* ref)
+{
+ ref->next = map->table[ref->hash % REFMAP_SIZE];
+ map->table[ref->hash % REFMAP_SIZE] = ref;
+}
+
+extern reference *reference_create(reference_map *map, chunk *label, chunk *url, chunk *title)
+{
+ reference *ref;
+ ref = malloc(sizeof(reference));
+ ref->label = normalize_reference(label);
+ ref->hash = refhash(ref->label);
+ ref->url = clean_url(url);
+ ref->title = clean_title(title);
+ ref->next = NULL;
+
+ add_reference(map, ref);
+
+ return ref;
+}
+
+// Returns reference if refmap contains a reference with matching
+// label, otherwise NULL.
+reference* reference_lookup(reference_map *map, chunk *label)
+{
+ reference *ref = NULL;
+ unsigned char *norm;
+ unsigned int hash;
+
+ if (map == NULL)
+ return NULL;
+
+ norm = normalize_reference(label);
+ hash = refhash(norm);
+ ref = map->table[hash % REFMAP_SIZE];
+
+ while (ref) {
+ if (ref->label[0] == norm[0] &&
+ !strcmp((char *)ref->label, (char *)norm))
+ break;
+ ref = ref->next;
+ }
+
+ free(norm);
+ return ref;
+}
+
+static void reference_free(reference *ref)
+{
+ free(ref->label);
+ free(ref->url);
+ free(ref->title);
+ free(ref);
+}
+
+void reference_map_free(reference_map *map)
+{
+ unsigned int i;
+
+ for (i = 0; i < REFMAP_SIZE; ++i) {
+ reference *ref = map->table[i];
+ reference *next;
+
+ while (ref) {
+ next = ref->next;
+ reference_free(ref);
+ ref = next;
+ }
+ }
+
+ free(map->table);
+ free(map);
+}
+
+reference_map *reference_map_new(void)
+{
+ reference_map *map = malloc(sizeof(reference_map));
+ memset(map, 0x0, sizeof(reference_map));
+ return map;
+}
+
diff --git a/src/references.h b/src/references.h
new file mode 100644
index 0000000..78fffe7
--- /dev/null
+++ b/src/references.h
@@ -0,0 +1,27 @@
+#ifndef _REFERENCES_H_
+#define _REFERENCES_H_
+
+#define REFMAP_SIZE 16
+
+struct reference {
+ struct reference *next;
+ unsigned char *label;
+ unsigned char *url;
+ unsigned char *title;
+ unsigned int hash;
+};
+
+typedef struct reference reference;
+
+struct reference_map {
+ reference *table[REFMAP_SIZE];
+};
+
+typedef struct reference_map reference_map;
+
+reference_map *reference_map_new(void);
+void reference_map_free(reference_map *map);
+reference* reference_lookup(reference_map *map, chunk *label);
+extern reference *reference_create(reference_map *map, chunk *label, chunk *url, chunk *title);
+
+#endif
diff --git a/src/stmd.h b/src/stmd.h
index 21a86b0..4e21e6c 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -5,7 +5,7 @@
#include
#include "buffer.h"
#include "chunk.h"
-#include "uthash.h"
+#include "references.h"
#define VERSION "0.1"
#define CODE_INDENT 4
@@ -36,17 +36,7 @@ struct node_inl {
typedef struct node_inl node_inl;
-struct reference {
- unsigned char *label;
- unsigned char *url;
- unsigned char *title;
- UT_hash_handle hh; // used by uthash
-};
-
-typedef struct reference reference;
-
// Types for blocks
-
struct ListData {
enum {
bullet,
@@ -104,7 +94,7 @@ struct node_block {
int level;
} header;
struct {
- reference** refmap;
+ reference_map *refmap;
} document;
} as;
@@ -114,14 +104,10 @@ struct node_block {
typedef struct node_block node_block;
-node_inl* parse_inlines(strbuf *input, reference** refmap);
+node_inl* parse_inlines(strbuf *input, reference_map *refmap);
void free_inlines(node_inl* e);
-int parse_reference(strbuf *input, reference** refmap);
-void free_reference(reference *ref);
-void free_reference_map(reference **refmap);
-
-void add_reference(reference** refmap, reference* ref);
+int parse_reference_inline(strbuf *input, reference_map *refmap);
void unescape_buffer(strbuf *buf);
extern node_block* make_document();
@@ -138,4 +124,8 @@ void print_blocks(node_block* blk, int indent);
void blocks_to_html(strbuf *html, node_block *b, bool tight);
void inlines_to_html(strbuf *html, node_inl *b);
+unsigned char *clean_url(chunk *url);
+unsigned char *clean_autolink(chunk *url, int is_email);
+unsigned char *clean_title(chunk *title);
+
#endif
diff --git a/src/utf8.c b/src/utf8.c
index 12d7ba5..c65aec6 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -25,7 +25,7 @@ static const int8_t utf8proc_utf8class[256] = {
static void encode_unknown(strbuf *buf)
{
- static const unsigned char repl[] = {239, 191, 189};
+ static const uint8_t repl[] = {239, 191, 189};
strbuf_put(buf, repl, 3);
}
@@ -52,9 +52,9 @@ ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len)
return length;
}
-void utf8proc_detab(strbuf *ob, const unsigned char *line, size_t size)
+void utf8proc_detab(strbuf *ob, const uint8_t *line, size_t size)
{
- static const unsigned char whitespace[] = " ";
+ static const uint8_t whitespace[] = " ";
size_t i = 0, tab = 0;
@@ -132,7 +132,7 @@ ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst)
void utf8proc_encode_char(int32_t uc, strbuf *buf)
{
- unsigned char dst[4];
+ uint8_t dst[4];
int len = 0;
assert(uc >= 0);
@@ -169,7 +169,7 @@ void utf8proc_encode_char(int32_t uc, strbuf *buf)
strbuf_put(buf, dst, len);
}
-void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len)
+void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len)
{
int32_t c;
diff --git a/src/utf8.h b/src/utf8.h
index 1e4e556..9506b75 100644
--- a/src/utf8.h
+++ b/src/utf8.h
@@ -1,12 +1,13 @@
#ifndef _H_STMD_UTF8_
#define _H_STMD_UTF8_
+#include
#include "buffer.h"
-void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len);
+void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len);
void utf8proc_encode_char(int32_t uc, strbuf *buf);
ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst);
ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len);
-void utf8proc_detab(strbuf *dest, const unsigned char *line, size_t size);
+void utf8proc_detab(strbuf *dest, const uint8_t *line, size_t size);
#endif
--
cgit v1.2.3
From 7c2a062cdf9c0514cdf32f4f8bd07cf52d183c8b Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Wed, 10 Sep 2014 19:46:34 +0200
Subject: Do not use strchr for span searches
Strchr will return a valid pointer for '\0' when searching a static
string, as the NULL byte is part of the string.
---
src/inlines.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
(limited to 'src/inlines.c')
diff --git a/src/inlines.c b/src/inlines.c
index 3040f09..cd2d124 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -767,10 +767,13 @@ node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap)
static int subject_find_special_char(subject *subj)
{
+ static const char CHARS[] = "\n\\`&_*[]pos + 1;
while (n < subj->input.len) {
- if (strchr("\n\\`&_*[]input.data[n]))
+ if (memchr(CHARS, subj->input.data[n], CHARS_SIZE))
return n;
n++;
}
--
cgit v1.2.3
From 118e3d3c39242225baa876319cdbfbb1adadc77b Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Mon, 15 Sep 2014 15:28:49 +0200
Subject: Cleanup external APIs
---
src/blocks.c | 11 ++--
src/html/html.c | 163 ++++++++++++++++++++++++++++---------------------------
src/inlines.c | 1 +
src/main.c | 8 +--
src/print.c | 114 +++++++++++++++++++-------------------
src/references.c | 1 +
src/stmd.h | 26 ++-------
7 files changed, 159 insertions(+), 165 deletions(-)
(limited to 'src/inlines.c')
diff --git a/src/blocks.c b/src/blocks.c
index 30a8284..2ac7032 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -6,8 +6,9 @@
#include "stmd.h"
#include "utf8.h"
-#include "html/houdini.h"
#include "scanners.h"
+#include "inlines.h"
+#include "html/houdini.h"
#define peek_at(i, n) (i)->data[n]
@@ -224,7 +225,7 @@ static void finalize(node_block* b, int line_number)
}
// Add a node_block as child of another. Return pointer to child.
-extern node_block* add_child(node_block* parent,
+static node_block* add_child(node_block* parent,
int block_type, int start_line, int start_column)
{
assert(parent);
@@ -252,7 +253,7 @@ extern node_block* add_child(node_block* parent,
}
// Free a node_block list and any children.
-extern void free_blocks(node_block* e)
+void stmd_free_nodes(node_block *e)
{
node_block * next;
while (e != NULL) {
@@ -264,7 +265,7 @@ extern void free_blocks(node_block* e)
} else if (e->tag == BLOCK_DOCUMENT) {
reference_map_free(e->as.document.refmap);
}
- free_blocks(e->children);
+ stmd_free_nodes(e->children);
free(e);
e = next;
}
@@ -279,8 +280,6 @@ void process_inlines(node_block* cur, reference_map *refmap)
case BLOCK_ATX_HEADER:
case BLOCK_SETEXT_HEADER:
cur->inline_content = parse_inlines(&cur->string_content, refmap);
- // MEM
- // strbuf_free(&cur->string_content);
break;
default:
diff --git a/src/html/html.c b/src/html/html.c
index b48b10b..6f3bc76 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -32,8 +32,89 @@ static inline void cr(strbuf *html)
strbuf_putc(html, '\n');
}
+// Convert an inline list to HTML. Returns 0 on success, and sets result.
+static void inlines_to_html(strbuf *html, node_inl* ils)
+{
+ strbuf scrap = GH_BUF_INIT;
+
+ while(ils != NULL) {
+ switch(ils->tag) {
+ case INL_STRING:
+ escape_html(html, ils->content.literal.data, ils->content.literal.len);
+ break;
+
+ case INL_LINEBREAK:
+ strbuf_puts(html, "
\n");
+ break;
+
+ case INL_SOFTBREAK:
+ strbuf_putc(html, '\n');
+ break;
+
+ case INL_CODE:
+ strbuf_puts(html, "");
+ escape_html(html, ils->content.literal.data, ils->content.literal.len);
+ strbuf_puts(html, "
");
+ break;
+
+ case INL_RAW_HTML:
+ strbuf_put(html,
+ ils->content.literal.data,
+ ils->content.literal.len);
+ break;
+
+ case INL_LINK:
+ strbuf_puts(html, "content.linkable.url)
+ escape_href(html, ils->content.linkable.url, -1);
+
+ if (ils->content.linkable.title) {
+ strbuf_puts(html, "\" title=\"");
+ escape_html(html, ils->content.linkable.title, -1);
+ }
+
+ strbuf_puts(html, "\">");
+ inlines_to_html(html, ils->content.inlines);
+ strbuf_puts(html, "");
+ break;
+
+ case INL_IMAGE:
+ strbuf_puts(html, "
content.linkable.url)
+ escape_href(html, ils->content.linkable.url, -1);
+
+ inlines_to_html(&scrap, ils->content.inlines);
+ strbuf_puts(html, "\" alt=\"");
+ if (scrap.size)
+ escape_html(html, scrap.ptr, scrap.size);
+ strbuf_clear(&scrap);
+
+ if (ils->content.linkable.title) {
+ strbuf_puts(html, "\" title=\"");
+ escape_html(html, ils->content.linkable.title, -1);
+ }
+
+ strbuf_puts(html, "\"/>");
+ break;
+
+ case INL_STRONG:
+ strbuf_puts(html, "");
+ inlines_to_html(html, ils->content.inlines);
+ strbuf_puts(html, "");
+ break;
+
+ case INL_EMPH:
+ strbuf_puts(html, "");
+ inlines_to_html(html, ils->content.inlines);
+ strbuf_puts(html, "");
+ break;
+ }
+ ils = ils->next;
+ }
+}
+
// Convert a node_block list to HTML. Returns 0 on success, and sets result.
-void blocks_to_html(strbuf *html, node_block *b, bool tight)
+static void blocks_to_html(strbuf *html, node_block *b, bool tight)
{
struct ListData *data;
@@ -139,83 +220,7 @@ void blocks_to_html(strbuf *html, node_block *b, bool tight)
}
}
-// Convert an inline list to HTML. Returns 0 on success, and sets result.
-void inlines_to_html(strbuf *html, node_inl* ils)
+void stmd_render_html(strbuf *html, node_block *root)
{
- strbuf scrap = GH_BUF_INIT;
-
- while(ils != NULL) {
- switch(ils->tag) {
- case INL_STRING:
- escape_html(html, ils->content.literal.data, ils->content.literal.len);
- break;
-
- case INL_LINEBREAK:
- strbuf_puts(html, "
\n");
- break;
-
- case INL_SOFTBREAK:
- strbuf_putc(html, '\n');
- break;
-
- case INL_CODE:
- strbuf_puts(html, "");
- escape_html(html, ils->content.literal.data, ils->content.literal.len);
- strbuf_puts(html, "
");
- break;
-
- case INL_RAW_HTML:
- strbuf_put(html,
- ils->content.literal.data,
- ils->content.literal.len);
- break;
-
- case INL_LINK:
- strbuf_puts(html, "content.linkable.url)
- escape_href(html, ils->content.linkable.url, -1);
-
- if (ils->content.linkable.title) {
- strbuf_puts(html, "\" title=\"");
- escape_html(html, ils->content.linkable.title, -1);
- }
-
- strbuf_puts(html, "\">");
- inlines_to_html(html, ils->content.inlines);
- strbuf_puts(html, "");
- break;
-
- case INL_IMAGE:
- strbuf_puts(html, "
content.linkable.url)
- escape_href(html, ils->content.linkable.url, -1);
-
- inlines_to_html(&scrap, ils->content.inlines);
- strbuf_puts(html, "\" alt=\"");
- if (scrap.size)
- escape_html(html, scrap.ptr, scrap.size);
- strbuf_clear(&scrap);
-
- if (ils->content.linkable.title) {
- strbuf_puts(html, "\" title=\"");
- escape_html(html, ils->content.linkable.title, -1);
- }
-
- strbuf_puts(html, "\"/>");
- break;
-
- case INL_STRONG:
- strbuf_puts(html, "");
- inlines_to_html(html, ils->content.inlines);
- strbuf_puts(html, "");
- break;
-
- case INL_EMPH:
- strbuf_puts(html, "");
- inlines_to_html(html, ils->content.inlines);
- strbuf_puts(html, "");
- break;
- }
- ils = ils->next;
- }
+ blocks_to_html(html, root, false);
}
diff --git a/src/inlines.c b/src/inlines.c
index cd2d124..145825c 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -8,6 +8,7 @@
#include "html/houdini.h"
#include "utf8.h"
#include "scanners.h"
+#include "inlines.h"
typedef struct Subject {
chunk input;
diff --git a/src/main.c b/src/main.c
index 90bb16d..76a0e12 100644
--- a/src/main.c
+++ b/src/main.c
@@ -17,9 +17,9 @@ static void print_document(node_block *document, bool ast)
strbuf html = GH_BUF_INIT;
if (ast) {
- print_blocks(document, 0);
+ stmd_debug_print(document);
} else {
- blocks_to_html(&html, document, false);
+ stmd_render_html(&html, document);
printf("%s", html.ptr);
strbuf_free(&html);
}
@@ -54,7 +54,7 @@ int main(int argc, char *argv[])
if (numfps == 0) {
document = stmd_parse_file(stdin);
print_document(document, ast);
- free_blocks(document);
+ stmd_free_nodes(document);
} else {
for (i = 0; i < numfps; i++) {
FILE *fp = fopen(argv[files[i]], "r");
@@ -67,7 +67,7 @@ int main(int argc, char *argv[])
document = stmd_parse_file(fp);
print_document(document, ast);
- free_blocks(document);
+ stmd_free_nodes(document);
fclose(fp);
}
}
diff --git a/src/print.c b/src/print.c
index 36140a8..83f8daa 100644
--- a/src/print.c
+++ b/src/print.c
@@ -32,14 +32,69 @@ static void print_str(const unsigned char *s, int len)
putchar('"');
}
+// Prettyprint an inline list, for debugging.
+static void print_inlines(node_inl* ils, int indent)
+{
+ while(ils != NULL) {
+ for (int i=0; i < indent; i++) {
+ putchar(' ');
+ }
+ switch(ils->tag) {
+ case INL_STRING:
+ printf("str ");
+ print_str(ils->content.literal.data, ils->content.literal.len);
+ putchar('\n');
+ break;
+ case INL_LINEBREAK:
+ printf("linebreak\n");
+ break;
+ case INL_SOFTBREAK:
+ printf("softbreak\n");
+ break;
+ case INL_CODE:
+ printf("code ");
+ print_str(ils->content.literal.data, ils->content.literal.len);
+ putchar('\n');
+ break;
+ case INL_RAW_HTML:
+ printf("html ");
+ print_str(ils->content.literal.data, ils->content.literal.len);
+ putchar('\n');
+ break;
+ case INL_LINK:
+ case INL_IMAGE:
+ printf("%s url=", ils->tag == INL_LINK ? "link" : "image");
+
+ if (ils->content.linkable.url)
+ print_str(ils->content.linkable.url, -1);
+
+ if (ils->content.linkable.title) {
+ printf(" title=");
+ print_str(ils->content.linkable.title, -1);
+ }
+ putchar('\n');
+ print_inlines(ils->content.linkable.label, indent + 2);
+ break;
+ case INL_STRONG:
+ printf("strong\n");
+ print_inlines(ils->content.linkable.label, indent + 2);
+ break;
+ case INL_EMPH:
+ printf("emph\n");
+ print_inlines(ils->content.linkable.label, indent + 2);
+ break;
+ }
+ ils = ils->next;
+ }
+}
+
// Functions to pretty-print inline and node_block lists, for debugging.
// Prettyprint an inline list, for debugging.
-extern void print_blocks(node_block* b, int indent)
+static void print_blocks(node_block* b, int indent)
{
struct ListData *data;
while(b != NULL) {
- // printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line);
for (int i=0; i < indent; i++) {
putchar(' ');
}
@@ -115,58 +170,7 @@ extern void print_blocks(node_block* b, int indent)
}
}
-// Prettyprint an inline list, for debugging.
-extern void print_inlines(node_inl* ils, int indent)
+void stmd_debug_print(node_block *root)
{
- while(ils != NULL) {
- for (int i=0; i < indent; i++) {
- putchar(' ');
- }
- switch(ils->tag) {
- case INL_STRING:
- printf("str ");
- print_str(ils->content.literal.data, ils->content.literal.len);
- putchar('\n');
- break;
- case INL_LINEBREAK:
- printf("linebreak\n");
- break;
- case INL_SOFTBREAK:
- printf("softbreak\n");
- break;
- case INL_CODE:
- printf("code ");
- print_str(ils->content.literal.data, ils->content.literal.len);
- putchar('\n');
- break;
- case INL_RAW_HTML:
- printf("html ");
- print_str(ils->content.literal.data, ils->content.literal.len);
- putchar('\n');
- break;
- case INL_LINK:
- case INL_IMAGE:
- printf("%s url=", ils->tag == INL_LINK ? "link" : "image");
-
- if (ils->content.linkable.url)
- print_str(ils->content.linkable.url, -1);
-
- if (ils->content.linkable.title) {
- printf(" title=");
- print_str(ils->content.linkable.title, -1);
- }
- putchar('\n');
- print_inlines(ils->content.linkable.label, indent + 2);
- break;
- case INL_STRONG:
- printf("strong\n");
- print_inlines(ils->content.linkable.label, indent + 2);
- break;
- case INL_EMPH:
- printf("emph\n");
- print_inlines(ils->content.linkable.label, indent + 2);
- break;
- }
- ils = ils->next;
- }
+ print_blocks(root, 0);
}
diff --git a/src/references.c b/src/references.c
index 300bbcc..3e54b48 100644
--- a/src/references.c
+++ b/src/references.c
@@ -1,6 +1,7 @@
#include "stmd.h"
#include "utf8.h"
#include "references.h"
+#include "inlines.h"
static unsigned int
refhash(const unsigned char *link_ref)
diff --git a/src/stmd.h b/src/stmd.h
index 4e21e6c..c6473a6 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -104,28 +104,12 @@ struct node_block {
typedef struct node_block node_block;
-node_inl* parse_inlines(strbuf *input, reference_map *refmap);
-void free_inlines(node_inl* e);
+node_block *stmd_parse_document(const unsigned char *buffer, size_t len);
+node_block *stmd_parse_file(FILE *f);
-int parse_reference_inline(strbuf *input, reference_map *refmap);
-void unescape_buffer(strbuf *buf);
+void stmd_free_nodes(node_block *e);
-extern node_block* make_document();
-extern node_block* add_child(node_block* parent,
- int block_type, int start_line, int start_column);
-void free_blocks(node_block* e);
-
-extern node_block *stmd_parse_document(const unsigned char *buffer, size_t len);
-extern node_block *stmd_parse_file(FILE *f);
-
-void print_inlines(node_inl* ils, int indent);
-void print_blocks(node_block* blk, int indent);
-
-void blocks_to_html(strbuf *html, node_block *b, bool tight);
-void inlines_to_html(strbuf *html, node_inl *b);
-
-unsigned char *clean_url(chunk *url);
-unsigned char *clean_autolink(chunk *url, int is_email);
-unsigned char *clean_title(chunk *title);
+void stmd_debug_print(node_block *root);
+void stmd_render_html(strbuf *html, node_block *root);
#endif
--
cgit v1.2.3
From 2943b3850c5cb9e4561c3d109b4513a123bf4db7 Mon Sep 17 00:00:00 2001
From: Jordan Milne
Date: Thu, 18 Sep 2014 17:21:12 -0300
Subject: Use a lookup table for subject_find_special_char
---
src/inlines.c | 22 +++++++++++++++++++---
1 file changed, 19 insertions(+), 3 deletions(-)
(limited to 'src/inlines.c')
diff --git a/src/inlines.c b/src/inlines.c
index 145825c..71d75e9 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -768,13 +768,29 @@ node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap)
static int subject_find_special_char(subject *subj)
{
- static const char CHARS[] = "\n\\`&_*[]pos + 1;
while (n < subj->input.len) {
- if (memchr(CHARS, subj->input.data[n], CHARS_SIZE))
+ if (SPECIAL_CHARS[subj->input.data[n]])
return n;
n++;
}
--
cgit v1.2.3