From e216094e2192c05ddbd0988458eb8c0012e7baf8 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Tue, 2 Sep 2014 01:10:54 +0200
Subject: lol
---
src/buffer.h | 119 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 119 insertions(+)
create mode 100644 src/buffer.h
(limited to 'src/buffer.h')
diff --git a/src/buffer.h b/src/buffer.h
new file mode 100644
index 0000000..2581ee3
--- /dev/null
+++ b/src/buffer.h
@@ -0,0 +1,119 @@
+#ifndef INCLUDE_buffer_h__
+#define INCLUDE_buffer_h__
+
+#include
+#include
+#include
+#include
+
+typedef struct {
+ unsigned char *ptr;
+ int asize, size;
+} gh_buf;
+
+extern unsigned char gh_buf__initbuf[];
+extern unsigned char gh_buf__oom[];
+
+#define GH_BUF_INIT { gh_buf__initbuf, 0, 0 }
+
+/**
+ * Initialize a gh_buf structure.
+ *
+ * For the cases where GH_BUF_INIT cannot be used to do static
+ * initialization.
+ */
+extern void gh_buf_init(gh_buf *buf, int initial_size);
+
+static inline void gh_buf_static(gh_buf *buf, unsigned char *source)
+{
+ buf->ptr = source;
+ buf->size = strlen(source);
+ buf->asize = -1;
+}
+
+/**
+ * Attempt to grow the buffer to hold at least `target_size` bytes.
+ *
+ * If the allocation fails, this will return an error. If mark_oom is true,
+ * this will mark the buffer as invalid for future operations; if false,
+ * existing buffer content will be preserved, but calling code must handle
+ * that buffer was not expanded.
+ */
+extern int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom);
+
+/**
+ * Grow the buffer to hold at least `target_size` bytes.
+ *
+ * If the allocation fails, this will return an error and the buffer will be
+ * marked as invalid for future operations, invaliding contents.
+ *
+ * @return 0 on success or -1 on failure
+ */
+static inline int gh_buf_grow(gh_buf *buf, int target_size)
+{
+ return gh_buf_try_grow(buf, target_size, true);
+}
+
+extern void gh_buf_free(gh_buf *buf);
+extern void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b);
+
+/**
+ * Test if there have been any reallocation failures with this gh_buf.
+ *
+ * Any function that writes to a gh_buf can fail due to memory allocation
+ * issues. If one fails, the gh_buf will be marked with an OOM error and
+ * further calls to modify the buffer will fail. Check gh_buf_oom() at the
+ * end of your sequence and it will be true if you ran out of memory at any
+ * point with that buffer.
+ *
+ * @return false if no error, true if allocation error
+ */
+static inline bool gh_buf_oom(const gh_buf *buf)
+{
+ return (buf->ptr == gh_buf__oom);
+}
+
+
+static inline size_t gh_buf_len(const gh_buf *buf)
+{
+ return buf->size;
+}
+
+extern int gh_buf_cmp(const gh_buf *a, const gh_buf *b);
+
+extern void gh_buf_attach(gh_buf *buf, char *ptr, int asize);
+extern char *gh_buf_detach(gh_buf *buf);
+extern void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf);
+
+static inline const char *gh_buf_cstr(const gh_buf *buf)
+{
+ return buf->ptr;
+}
+
+#define gh_buf_at(buf, n) ((buf)->ptr[n])
+
+/*
+ * Functions below that return int value error codes will return 0 on
+ * success or -1 on failure (which generally means an allocation failed).
+ * Using a gh_buf where the allocation has failed with result in -1 from
+ * all further calls using that buffer. As a result, you can ignore the
+ * return code of these functions and call them in a series then just call
+ * gh_buf_oom at the end.
+ */
+extern int gh_buf_set(gh_buf *buf, const char *data, int len);
+extern int gh_buf_sets(gh_buf *buf, const char *string);
+extern int gh_buf_putc(gh_buf *buf, char c);
+extern int gh_buf_put(gh_buf *buf, const char *data, int len);
+extern int gh_buf_puts(gh_buf *buf, const char *string);
+extern int gh_buf_printf(gh_buf *buf, const char *format, ...)
+ __attribute__((format (printf, 2, 3)));
+extern int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap);
+extern void gh_buf_clear(gh_buf *buf);
+
+int gh_buf_strchr(const gh_buf *buf, int c, int pos);
+int gh_buf_strrchr(const gh_buf *buf, int c, int pos);
+void gh_buf_truncate(gh_buf *buf, int len);
+void gh_buf_ltruncate(gh_buf *buf, int len);
+void gh_buf_trim(gh_buf *buf);
+
+#endif
--
cgit v1.2.3
From 582674e662d1f8757350c51486a5e0a837195e15 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Tue, 2 Sep 2014 13:18:04 +0200
Subject: ffffix
---
Makefile | 11 ++-
src/blocks.c | 58 +++++++----
src/buffer.c | 69 +++++--------
src/buffer.h | 19 ++--
src/html.c | 276 ----------------------------------------------------
src/inlines.c | 4 +-
src/main.c | 142 ++++++++++++---------------
src/print.c | 307 ++++++++++++++++++++++++++++++----------------------------
src/stmd.h | 13 +--
src/utf8.c | 6 +-
src/utf8.h | 6 --
11 files changed, 304 insertions(+), 607 deletions(-)
delete mode 100644 src/html.c
delete mode 100644 src/utf8.h
(limited to 'src/buffer.h')
diff --git a/Makefile b/Makefile
index cb5938d..d14a928 100644
--- a/Makefile
+++ b/Makefile
@@ -6,7 +6,7 @@ DATADIR=data
PROG=./stmd
.PHONY: all oldtests test spec benchjs testjs
-all: $(SRCDIR)/case_fold_switch.c $(PROG)
+all: $(SRCDIR)/case_fold_switch.inc $(PROG)
README.html: README.md template.html
pandoc --template template.html -S -s -t html5 -o $@ $<
@@ -41,13 +41,16 @@ testjs: spec.txt
benchjs:
node js/bench.js ${BENCHINP}
-$(PROG): $(SRCDIR)/main.c $(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/html.o $(SRCDIR)/utf8.o
+HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o
+STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o
+
+$(PROG): $(SRCDIR)/main.c $(HTML_OBJ) $(STMD_OBJ)
$(CC) $(LDFLAGS) -o $@ $^
$(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re
re2c --case-insensitive -bis $< > $@ || (rm $@ && false)
-$(SRCDIR)/case_fold_switch.inc $(DATADIR)/CaseFolding-3.2.0.txt
+$(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding-3.2.0.txt
perl mkcasefold.pl < $< > $@
.PHONY: leakcheck clean fuzztest dingus upload
@@ -72,7 +75,7 @@ update-site: spec.html narrative.html
(cd _site ; git pull ; git commit -a -m "Updated site for latest spec, narrative, js" ; git push; cd ..)
clean:
- -rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c
+ -rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c $(SRCDIR)/html/*.o
-rm -rf *.dSYM
-rm -f README.html
-rm -f spec.md fuzz.txt spec.html
diff --git a/src/blocks.c b/src/blocks.c
index eabac03..71dc830 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -3,11 +3,12 @@
#include
#include
#include
-#include "bstrlib.h"
+
#include "stmd.h"
-#include "uthash.h"
-#include "debug.h"
#include "scanners.h"
+#include "uthash.h"
+
+static void finalize(block* b, int line_number);
static block* make_block(int tag, int start_line, int start_column)
{
@@ -140,7 +141,7 @@ static int break_out_of_lists(block ** bptr, int line_number)
}
-extern void finalize(block* b, int line_number)
+static void finalize(block* b, int line_number)
{
int firstlinelen;
int pos;
@@ -364,7 +365,7 @@ static int lists_match(struct ListData list_data,
list_data.bullet_char == item_data.bullet_char);
}
-static void expand_tabs(gh_buf *ob, const char *line, size_t size)
+static void expand_tabs(gh_buf *ob, const unsigned char *line, size_t size)
{
size_t i = 0, tab = 0;
@@ -389,13 +390,43 @@ static void expand_tabs(gh_buf *ob, const char *line, size_t size)
}
}
-extern block *stmd_parse_document(const char *buffer, size_t len)
+static block *finalize_parsing(block *document, int linenum)
{
- gh_buf line = GH_BUF_INIT;
+ while (document != document->top) {
+ finalize(document, linenum);
+ document = document->parent;
+ }
+
+ finalize(document, linenum);
+ process_inlines(document, document->attributes.refmap);
+
+ return document;
+}
+extern block *stmd_parse_file(FILE *f)
+{
+ gh_buf line = GH_BUF_INIT;
+ unsigned char buffer[4096];
+ int linenum = 1;
block *document = make_document();
+
+ while (fgets((char *)buffer, sizeof(buffer), f)) {
+ expand_tabs(&line, buffer, strlen(buffer));
+ incorporate_line(&line, linenum, &document);
+ gh_buf_clear(&line);
+ linenum++;
+ }
+
+ gh_buf_free(&line);
+ return finalize_document(document, linenum);
+}
+
+extern block *stmd_parse_document(const unsigned char *buffer, size_t len)
+{
+ gh_buf line = GH_BUF_INIT;
int linenum = 1;
- const char *end = buffer + len;
+ const unsigned char *end = buffer + len;
+ block *document = make_document();
while (buffer < end) {
const char *eol = memchr(buffer, '\n', end - buffer);
@@ -414,16 +445,7 @@ extern block *stmd_parse_document(const char *buffer, size_t len)
}
gh_buf_free(&line);
-
- while (document != document->top) {
- finalize(document, linenum);
- document = document->parent;
- }
-
- finalize(document, linenum);
- process_inlines(document, document->attributes.refmap);
-
- return document;
+ return finalize_document(document, linenum);
}
// Process one line at a time, modifying a block.
diff --git a/src/buffer.c b/src/buffer.c
index b81e7fa..17dc864 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -31,10 +31,10 @@ void gh_buf_init(gh_buf *buf, int initial_size)
int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom)
{
- char *new_ptr;
- size_t new_size;
+ unsigned char *new_ptr;
+ int new_size;
- if (buf->ptr == gh_buf__oom || buf->asize < 0)
+ if (buf->ptr == gh_buf__oom)
return -1;
if (target_size <= buf->asize)
@@ -79,7 +79,7 @@ void gh_buf_free(gh_buf *buf)
{
if (!buf) return;
- if (buf->asize > 0 && buf->ptr != gh_buf__initbuf && buf->ptr != gh_buf__oom)
+ if (buf->ptr != gh_buf__initbuf && buf->ptr != gh_buf__oom)
free(buf->ptr);
gh_buf_init(buf, 0);
@@ -91,14 +91,9 @@ void gh_buf_clear(gh_buf *buf)
if (buf->asize > 0)
buf->ptr[0] = '\0';
-
- if (buf->asize < 0) {
- buf->ptr = gh_buf__initbuf;
- buf->asize = 0;
- }
}
-int gh_buf_set(gh_buf *buf, const char *data, int len)
+int gh_buf_set(gh_buf *buf, const unsigned char *data, int len)
{
if (len == 0 || data == NULL) {
gh_buf_clear(buf);
@@ -115,10 +110,12 @@ int gh_buf_set(gh_buf *buf, const char *data, int len)
int gh_buf_sets(gh_buf *buf, const char *string)
{
- return gh_buf_set(buf, string, string ? strlen(string) : 0);
+ return gh_buf_set(buf,
+ (const unsigned char *)string,
+ string ? strlen(string) : 0);
}
-int gh_buf_putc(gh_buf *buf, char c)
+int gh_buf_putc(gh_buf *buf, int c)
{
ENSURE_SIZE(buf, buf->size + 2);
buf->ptr[buf->size++] = c;
@@ -126,7 +123,7 @@ int gh_buf_putc(gh_buf *buf, char c)
return 0;
}
-int gh_buf_put(gh_buf *buf, const char *data, int len)
+int gh_buf_put(gh_buf *buf, const unsigned char *data, int len)
{
ENSURE_SIZE(buf, buf->size + len + 1);
memmove(buf->ptr + buf->size, data, len);
@@ -137,8 +134,7 @@ int gh_buf_put(gh_buf *buf, const char *data, int len)
int gh_buf_puts(gh_buf *buf, const char *string)
{
- assert(string);
- return gh_buf_put(buf, string, strlen(string));
+ return gh_buf_put(buf, (const unsigned char *)string, strlen(string));
}
int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap)
@@ -153,7 +149,7 @@ int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap)
va_copy(args, ap);
len = vsnprintf(
- buf->ptr + buf->size,
+ (char *)buf->ptr + buf->size,
buf->asize - buf->size,
format, args
);
@@ -187,9 +183,9 @@ int gh_buf_printf(gh_buf *buf, const char *format, ...)
return r;
}
-void gh_buf_copy_cstr(char *data, size_t datasize, const gh_buf *buf)
+void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf)
{
- size_t copylen;
+ int copylen;
assert(data && datasize && buf);
@@ -212,9 +208,9 @@ void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b)
*buf_b = t;
}
-char *gh_buf_detach(gh_buf *buf)
+unsigned char *gh_buf_detach(gh_buf *buf)
{
- char *data = buf->ptr;
+ unsigned char *data = buf->ptr;
if (buf->asize == 0 || buf->ptr == gh_buf__oom)
return NULL;
@@ -224,13 +220,13 @@ char *gh_buf_detach(gh_buf *buf)
return data;
}
-void gh_buf_attach(gh_buf *buf, char *ptr, int asize)
+void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize)
{
gh_buf_free(buf);
if (ptr) {
buf->ptr = ptr;
- buf->size = strlen(ptr);
+ buf->size = strlen((char *)ptr);
if (asize)
buf->asize = (asize < buf->size) ? buf->size + 1 : asize;
else /* pass 0 to fall back on strlen + 1 */
@@ -249,11 +245,11 @@ int gh_buf_cmp(const gh_buf *a, const gh_buf *b)
int gh_buf_strchr(const gh_buf *buf, int c, int pos)
{
- const char *p = memchr(buf->ptr + pos, c, buf->size - pos);
- if (!p)
- return -1;
+ const char *p = memchr(buf->ptr + pos, c, buf->size - pos);
+ if (!p)
+ return -1;
- return (int)(p - p->ptr);
+ return (int)(p - buf->ptr);
}
int gh_buf_strrchr(const gh_buf *buf, int c, int pos)
@@ -270,36 +266,21 @@ int gh_buf_strrchr(const gh_buf *buf, int c, int pos)
void gh_buf_truncate(gh_buf *buf, size_t len)
{
- assert(buf->asize >= 0);
-
if (len < buf->size) {
buf->size = len;
buf->ptr[buf->size] = '\0';
}
}
-void gh_buf_ltruncate(gh_buf *buf, size_t len)
-{
- assert(buf->asize >= 0);
-
- if (len && len < buf->size) {
- memmove(buf->ptr, buf->ptr + len, buf->size - len);
- buf->size -= len;
- buf->ptr[buf->size] = '\0';
- }
-}
-
void gh_buf_trim(gh_buf *buf)
{
- size_t i = 0;
-
- assert(buf->asize >= 0);
-
- /* ltrim */
+ /* TODO: leading whitespace? */
+ /*
while (i < buf->size && isspace(buf->ptr[i]))
i++;
gh_buf_truncate(buf, i);
+ */
/* rtrim */
while (buf->size > 0) {
diff --git a/src/buffer.h b/src/buffer.h
index 2581ee3..422ef02 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -24,13 +24,6 @@ extern unsigned char gh_buf__oom[];
*/
extern void gh_buf_init(gh_buf *buf, int initial_size);
-static inline void gh_buf_static(gh_buf *buf, unsigned char *source)
-{
- buf->ptr = source;
- buf->size = strlen(source);
- buf->asize = -1;
-}
-
/**
* Attempt to grow the buffer to hold at least `target_size` bytes.
*
@@ -81,13 +74,13 @@ static inline size_t gh_buf_len(const gh_buf *buf)
extern int gh_buf_cmp(const gh_buf *a, const gh_buf *b);
-extern void gh_buf_attach(gh_buf *buf, char *ptr, int asize);
-extern char *gh_buf_detach(gh_buf *buf);
+extern void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize);
+extern unsigned char *gh_buf_detach(gh_buf *buf);
extern void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf);
static inline const char *gh_buf_cstr(const gh_buf *buf)
{
- return buf->ptr;
+ return (char *)buf->ptr;
}
#define gh_buf_at(buf, n) ((buf)->ptr[n])
@@ -100,10 +93,10 @@ static inline const char *gh_buf_cstr(const gh_buf *buf)
* return code of these functions and call them in a series then just call
* gh_buf_oom at the end.
*/
-extern int gh_buf_set(gh_buf *buf, const char *data, int len);
+extern int gh_buf_set(gh_buf *buf, const unsigned char *data, int len);
extern int gh_buf_sets(gh_buf *buf, const char *string);
-extern int gh_buf_putc(gh_buf *buf, char c);
-extern int gh_buf_put(gh_buf *buf, const char *data, int len);
+extern int gh_buf_putc(gh_buf *buf, int c);
+extern int gh_buf_put(gh_buf *buf, const unsigned char *data, int len);
extern int gh_buf_puts(gh_buf *buf, const char *string);
extern int gh_buf_printf(gh_buf *buf, const char *format, ...)
__attribute__((format (printf, 2, 3)));
diff --git a/src/html.c b/src/html.c
deleted file mode 100644
index aeec5f1..0000000
--- a/src/html.c
+++ /dev/null
@@ -1,276 +0,0 @@
-#include
-#include
-#include
-#include "bstrlib.h"
-#include "stmd.h"
-#include "debug.h"
-#include "scanners.h"
-
-// Functions to convert block and inline lists to HTML strings.
-
-// Escape special characters in HTML. More efficient than
-// three calls to bfindreplace. If preserve_entities is set,
-// existing entities are left alone.
-static bstring escape_html(bstring inp, bool preserve_entities)
-{
- int pos = 0;
- int match;
- char c;
- bstring escapable = blk2bstr("&<>\"", 4);
- bstring ent;
- bstring s = bstrcpy(inp);
- while ((pos = binchr(s, pos, escapable)) != BSTR_ERR) {
- c = bchar(s,pos);
- switch (c) {
- case '<':
- bdelete(s, pos, 1);
- ent = blk2bstr("<", 4);
- binsert(s, pos, ent, ' ');
- bdestroy(ent);
- pos += 4;
- break;
- case '>':
- bdelete(s, pos, 1);
- ent = blk2bstr(">", 4);
- binsert(s, pos, ent, ' ');
- bdestroy(ent);
- pos += 4;
- break;
- case '&':
- if (preserve_entities && (match = scan_entity(s, pos))) {
- pos += match;
- } else {
- bdelete(s, pos, 1);
- ent = blk2bstr("&", 5);
- binsert(s, pos, ent, ' ');
- bdestroy(ent);
- pos += 5;
- }
- break;
- case '"':
- bdelete(s, pos, 1);
- ent = blk2bstr(""", 6);
- binsert(s, pos, ent, ' ');
- bdestroy(ent);
- pos += 6;
- break;
- default:
- bdelete(s, pos, 1);
- log_err("unexpected character %02x", c);
- }
- }
- bdestroy(escapable);
- return s;
-}
-
-static inline void cr(bstring buffer)
-{
- int c = bchar(buffer, blength(buffer) - 1);
- if (c != '\n' && c) {
- bconchar(buffer, '\n');
- }
-}
-
-// Convert a block list to HTML. Returns 0 on success, and sets result.
-extern int blocks_to_html(block* b, bstring* result, bool tight)
-{
- bstring contents = NULL;
- bstring escaped, escaped2;
- struct bstrList * info_words;
- struct ListData * data;
- bstring mbstart;
- bstring html = blk2bstr("", 0);
-
- while(b != NULL) {
- switch(b->tag) {
- case document:
- check(blocks_to_html(b->children, &contents, false) == 0,
- "error converting blocks to html");
- bformata(html, "%s", contents->data);
- bdestroy(contents);
- break;
- case paragraph:
- check(inlines_to_html(b->inline_content, &contents) == 0,
- "error converting inlines to html");
- if (tight) {
- bformata(html, "%s", contents->data);
- } else {
- cr(html);
- bformata(html, "%s
", contents->data);
- cr(html);
- }
- bdestroy(contents);
- break;
- case block_quote:
- check(blocks_to_html(b->children, &contents, false) == 0,
- "error converting blocks to html");
- cr(html);
- bformata(html, "\n%s
", contents->data);
- cr(html);
- bdestroy(contents);
- break;
- case list_item:
- check(blocks_to_html(b->children, &contents, tight) == 0,
- "error converting blocks to html");
- brtrimws(contents);
- cr(html);
- bformata(html, "%s", contents->data);
- cr(html);
- bdestroy(contents);
- break;
- case list:
- // make sure a list starts at the beginning of the line:
- cr(html);
- data = &(b->attributes.list_data);
- check(blocks_to_html(b->children, &contents, data->tight) == 0,
- "error converting blocks to html");
- mbstart = bformat(" start=\"%d\"", data->start);
- bformata(html, "<%s%s>\n%s%s>",
- data->list_type == bullet ? "ul" : "ol",
- data->start == 1 ? "" : (char*) mbstart->data,
- contents->data,
- data->list_type == bullet ? "ul" : "ol");
- cr(html);
- bdestroy(contents);
- bdestroy(mbstart);
- break;
- case atx_header:
- case setext_header:
- check(inlines_to_html(b->inline_content, &contents) == 0,
- "error converting inlines to html");
- cr(html);
- bformata(html, "%s",
- b->attributes.header_level,
- contents->data,
- b->attributes.header_level);
- cr(html);
- bdestroy(contents);
- break;
- case indented_code:
- escaped = escape_html(b->string_content, false);
- cr(html);
- bformata(html, "%s
", escaped->data);
- cr(html);
- bdestroy(escaped);
- break;
- case fenced_code:
- escaped = escape_html(b->string_content, false);
- cr(html);
- bformata(html, "attributes.fenced_code_data.info) > 0) {
- escaped2 = escape_html(b->attributes.fenced_code_data.info, true);
- info_words = bsplit(escaped2, ' ');
- bformata(html, " class=\"language-%s\"", info_words->entry[0]->data);
- bdestroy(escaped2);
- bstrListDestroy(info_words);
- }
- bformata(html, ">%s
", escaped->data);
- cr(html);
- bdestroy(escaped);
- break;
- case html_block:
- bformata(html, "%s", b->string_content->data);
- break;
- case hrule:
- bformata(html, "
");
- cr(html);
- break;
- case reference_def:
- break;
- default:
- log_warn("block type %d not implemented\n", b->tag);
- break;
- }
- b = b->next;
- }
- *result = html;
- return 0;
- error:
- return -1;
-}
-
-// Convert an inline list to HTML. Returns 0 on success, and sets result.
-extern int inlines_to_html(inl* ils, bstring* result)
-{
- bstring contents = NULL;
- bstring html = blk2bstr("", 0);
- bstring mbtitle, escaped, escaped2;
-
- while(ils != NULL) {
- switch(ils->tag) {
- case str:
- escaped = escape_html(ils->content.literal, false);
- bformata(html, "%s", escaped->data);
- bdestroy(escaped);
- break;
- case linebreak:
- bformata(html, "
\n");
- break;
- case softbreak:
- bformata(html, "\n");
- break;
- case code:
- escaped = escape_html(ils->content.literal, false);
- bformata(html, "%s
", escaped->data);
- bdestroy(escaped);
- break;
- case raw_html:
- case entity:
- bformata(html, "%s", ils->content.literal->data);
- break;
- case link:
- check(inlines_to_html(ils->content.inlines, &contents) == 0,
- "error converting inlines to html");
- if (blength(ils->content.linkable.title) > 0) {
- escaped = escape_html(ils->content.linkable.title, true);
- mbtitle = bformat(" title=\"%s\"", escaped->data);
- bdestroy(escaped);
- } else {
- mbtitle = blk2bstr("",0);
- }
- escaped = escape_html(ils->content.linkable.url, true);
- bformata(html, "%s",
- escaped->data,
- mbtitle->data,
- contents->data);
- bdestroy(escaped);
- bdestroy(mbtitle);
- bdestroy(contents);
- break;
- case image:
- check(inlines_to_html(ils->content.inlines, &contents) == 0,
- "error converting inlines to html");
- escaped = escape_html(ils->content.linkable.url, true);
- escaped2 = escape_html(contents, false);
- bdestroy(contents);
- bformata(html, "
data, escaped2->data);
- bdestroy(escaped);
- bdestroy(escaped2);
- if (blength(ils->content.linkable.title) > 0) {
- escaped = escape_html(ils->content.linkable.title, true);
- bformata(html, " title=\"%s\"", escaped->data);
- bdestroy(escaped);
- }
- bformata(html, " />");
- break;
- case strong:
- check(inlines_to_html(ils->content.inlines, &contents) == 0,
- "error converting inlines to html");
- bformata(html, "%s", contents->data);
- bdestroy(contents);
- break;
- case emph:
- check(inlines_to_html(ils->content.inlines, &contents) == 0,
- "error converting inlines to html");
- bformata(html, "%s", contents->data);
- bdestroy(contents);
- break;
- }
- ils = ils->next;
- }
- *result = html;
- return 0;
- error:
- return -1;
-}
diff --git a/src/inlines.c b/src/inlines.c
index 4ff45ad..82c7219 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -6,9 +6,7 @@
#include "stmd.h"
#include "uthash.h"
-#include "debug.h"
#include "scanners.h"
-#include "utf8.h"
typedef struct Subject {
const gh_buf *buffer;
@@ -119,7 +117,7 @@ inline static inl* make_linkable(int t, inl* label, chunk url, chunk title)
e->tag = t;
e->content.linkable.label = label;
e->content.linkable.url = chunk_to_cstr(&url);
- e->content.linkable.title = chunk_to_cstr(&title);
+ e->content.linkable.title = url.len ? chunk_to_cstr(&title) : NULL;
e->next = NULL;
return e;
}
diff --git a/src/main.c b/src/main.c
index 9e0a3c8..e1abedc 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,99 +1,77 @@
#include
#include
-#include "bstrlib.h"
+#include
#include "stmd.h"
#include "debug.h"
void print_usage()
{
- printf("Usage: stmd [FILE*]\n");
- printf("Options: --help, -h Print usage information\n");
- printf(" --ast Print AST instead of HTML\n");
- printf(" --version Print version\n");
+ printf("Usage: stmd [FILE*]\n");
+ printf("Options: --help, -h Print usage information\n");
+ printf(" --ast Print AST instead of HTML\n");
+ printf(" --version Print version\n");
}
-int main(int argc, char *argv[]) {
- int i;
- bool ast = false;
- int g = 0;
- int numfps = 0;
- int files[argc];
+static void print_document(block *document, bool ast)
+{
+ gh_buf html = GH_BUF_INIT;
+
+ if (ast) {
+ print_blocks(document, 0);
+ } else {
+ blocks_to_html(&html, document, false);
+ printf("%s", html.ptr);
+ gh_buf_free(&html);
+ }
+}
- for (i=1; i < argc; i++) {
- if (strcmp(argv[i], "--version") == 0) {
- printf("stmd %s", VERSION);
- printf(" - CommonMark converter (c) 2014 John MacFarlane\n");
- exit(0);
- } else if ((strcmp(argv[i], "--help") == 0) ||
- (strcmp(argv[i], "-h") == 0)) {
- print_usage();
- exit(0);
- } else if (strcmp(argv[i], "--ast") == 0) {
- ast = true;
- } else if (*argv[i] == '-') {
- print_usage();
- exit(1);
- } else { // treat as file argument
- files[g] = i;
- g++;
- }
- }
+int main(int argc, char *argv[])
+{
+ int i, numfps = 0;
+ bool ast = false;
+ int files[argc];
+ block *document = NULL;
- numfps = g;
- bstring s = NULL;
- bstring html;
- g = 0;
- block * cur = make_document();
- int linenum = 1;
- extern int errno;
- FILE * fp = NULL;
+ for (i = 1; i < argc; i++) {
+ if (strcmp(argv[i], "--version") == 0) {
+ printf("stmd %s", VERSION);
+ printf(" - CommonMark converter (c) 2014 John MacFarlane\n");
+ exit(0);
+ } else if ((strcmp(argv[i], "--help") == 0) ||
+ (strcmp(argv[i], "-h") == 0)) {
+ print_usage();
+ exit(0);
+ } else if (strcmp(argv[i], "--ast") == 0) {
+ ast = true;
+ } else if (*argv[i] == '-') {
+ print_usage();
+ exit(1);
+ } else { // treat as file argument
+ files[numfps++] = i;
+ }
+ }
- if (numfps == 0) {
- // read from stdin
- while ((s = bgets((bNgetc) fgetc, stdin, '\n'))) {
- check(incorporate_line(s, linenum, &cur) == 0,
- "error incorporating line %d", linenum);
- bdestroy(s);
- linenum++;
- }
- } else {
- // iterate over input file pointers
- for (g=0; g < numfps; g++) {
+ if (numfps == 0) {
+ document = stmd_parse_file(stdin);
+ print_document(document, ast);
+ free_blocks(document);
+ } else {
+ for (i = 0; i < numfps; i++) {
+ FILE *fp = fopen(argv[files[i]], "r");
- fp = fopen(argv[files[g]], "r");
- if (fp == NULL) {
- fprintf(stderr, "Error opening file %s: %s\n",
- argv[files[g]], strerror(errno));
- exit(1);
- }
+ if (fp == NULL) {
+ fprintf(stderr, "Error opening file %s: %s\n",
+ argv[files[i]], strerror(errno));
+ exit(1);
+ }
- while ((s = bgets((bNgetc) fgetc, fp, '\n'))) {
- check(incorporate_line(s, linenum, &cur) == 0,
- "error incorporating line %d", linenum);
- bdestroy(s);
- linenum++;
- }
- fclose(fp);
- }
- }
+ document = stmd_parse_file(fp);
+ print_document(document, ast);
+ free_blocks(document);
+ fclose(fp);
+ }
+ }
- while (cur != cur->top) {
- finalize(cur, linenum);
- cur = cur->parent;
- }
- check(cur == cur->top, "problems finalizing open containers");
- finalize(cur, linenum);
- process_inlines(cur, cur->attributes.refmap);
- if (ast) {
- print_blocks(cur, 0);
- } else {
- check(blocks_to_html(cur, &html, false) == 0, "could not format as HTML");
- // printf("%s", html->data);
- bdestroy(html);
- }
- free_blocks(cur);
- return 0;
-error:
- return -1;
+ return 0;
}
diff --git a/src/print.c b/src/print.c
index a924870..3ebde16 100644
--- a/src/print.c
+++ b/src/print.c
@@ -1,168 +1,175 @@
#include
#include
-#include "bstrlib.h"
+#include
#include "stmd.h"
#include "debug.h"
-static bstring format_str(bstring s)
+static void print_str(const unsigned char *s, int len)
{
- int pos = 0;
- int len = blength(s);
- bstring result = bfromcstr("");
- char c;
- bformata(result, "\"");
- while (pos < len) {
- c = bchar(s, pos);
- switch (c) {
- case '\n':
- bformata(result, "\\n");
- break;
- case '"':
- bformata(result, "\\\"");
- break;
- case '\\':
- bformata(result, "\\\\");
- break;
- default:
- bformata(result, "%c", c);
- }
- pos++;
- }
- bformata(result, "\"");
- return result;
+ int i;
+
+ if (len < 0)
+ len = strlen(s);
+
+ putchar('"');
+ for (i = 0; i < len; ++i) {
+ unsigned char c = s[i];
+
+ switch (c) {
+ case '\n':
+ printf("\\n");
+ break;
+ case '"':
+ printf("\\\"");
+ break;
+ case '\\':
+ printf("\\\\");
+ break;
+ default:
+ putchar((int)c);
+ }
+ }
+ putchar('"');
}
// Functions to pretty-print inline and block lists, for debugging.
// Prettyprint an inline list, for debugging.
extern void print_blocks(block* b, int indent)
{
- struct ListData * data;
- while(b != NULL) {
- // printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line);
- for (int i=0; i < indent; i++) {
- putchar(' ');
- }
- switch(b->tag) {
- case document:
- printf("document\n");
- print_blocks(b->children, indent + 2);
- break;
- case block_quote:
- printf("block_quote\n");
- print_blocks(b->children, indent + 2);
- break;
- case list_item:
- data = &(b->attributes.list_data);
- printf("list_item\n");
- print_blocks(b->children, indent + 2);
- break;
- case list:
- data = &(b->attributes.list_data);
- if (data->list_type == ordered) {
- printf("list (type=ordered tight=%s start=%d delim=%s)\n",
- (data->tight ? "true" : "false"),
- data->start,
- (data->delimiter == parens ? "parens" : "period"));
- } else {
- printf("list (type=bullet tight=%s bullet_char=%c)\n",
- (data->tight ? "true" : "false"),
- data->bullet_char);
- }
- print_blocks(b->children, indent + 2);
- break;
- case atx_header:
- printf("atx_header (level=%d)\n", b->attributes.header_level);
- print_inlines(b->inline_content, indent + 2);
- break;
- case setext_header:
- printf("setext_header (level=%d)\n", b->attributes.header_level);
- print_inlines(b->inline_content, indent + 2);
- break;
- case paragraph:
- printf("paragraph\n");
- print_inlines(b->inline_content, indent + 2);
- break;
- case hrule:
- printf("hrule\n");
- break;
- case indented_code:
- printf("indented_code %s\n", format_str(b->string_content)->data);
- break;
- case fenced_code:
- printf("fenced_code length=%d info=%s %s\n",
- b->attributes.fenced_code_data.fence_length,
- format_str(b->attributes.fenced_code_data.info)->data,
- format_str(b->string_content)->data);
- break;
- case html_block:
- printf("html_block %s\n", format_str(b->string_content)->data);
- break;
- case reference_def:
- printf("reference_def\n");
- break;
- default:
- log_warn("block type %d not implemented\n", b->tag);
- break;
- }
- b = b->next;
- }
+ struct ListData *data;
+
+ while(b != NULL) {
+ // printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line);
+ for (int i=0; i < indent; i++) {
+ putchar(' ');
+ }
+
+ switch(b->tag) {
+ case document:
+ printf("document\n");
+ print_blocks(b->children, indent + 2);
+ break;
+ case block_quote:
+ printf("block_quote\n");
+ print_blocks(b->children, indent + 2);
+ break;
+ case list_item:
+ data = &(b->attributes.list_data);
+ printf("list_item\n");
+ print_blocks(b->children, indent + 2);
+ break;
+ case list:
+ data = &(b->attributes.list_data);
+ if (data->list_type == ordered) {
+ printf("list (type=ordered tight=%s start=%d delim=%s)\n",
+ (data->tight ? "true" : "false"),
+ data->start,
+ (data->delimiter == parens ? "parens" : "period"));
+ } else {
+ printf("list (type=bullet tight=%s bullet_char=%c)\n",
+ (data->tight ? "true" : "false"),
+ data->bullet_char);
+ }
+ print_blocks(b->children, indent + 2);
+ break;
+ case atx_header:
+ printf("atx_header (level=%d)\n", b->attributes.header_level);
+ print_inlines(b->inline_content, indent + 2);
+ break;
+ case setext_header:
+ printf("setext_header (level=%d)\n", b->attributes.header_level);
+ print_inlines(b->inline_content, indent + 2);
+ break;
+ case paragraph:
+ printf("paragraph\n");
+ print_inlines(b->inline_content, indent + 2);
+ break;
+ case hrule:
+ printf("hrule\n");
+ break;
+ case indented_code:
+ printf("indented_code ");
+ print_str(b->string_content.ptr, -1);
+ putchar('\n');
+ break;
+ case fenced_code:
+ printf("fenced_code length=%d info=",
+ b->attributes.fenced_code_data.fence_length);
+ print_str(b->attributes.fenced_code_data.info.ptr, -1);
+ putchar(' ');
+ print_str(b->string_content.ptr, -1);
+ putchar('\n');
+ break;
+ case html_block:
+ printf("html_block ");
+ print_str(b->string_content.ptr, -1);
+ putchar('\n');
+ break;
+ case reference_def:
+ printf("reference_def\n");
+ break;
+ default:
+ printf("# NOT IMPLEMENTED (%d)\n", b->tag);
+ break;
+ }
+ b = b->next;
+ }
}
// Prettyprint an inline list, for debugging.
extern void print_inlines(inl* ils, int indent)
{
- while(ils != NULL) {
- /*
- // we add 11 extra spaces for the line/column info
- for (int i=0; i < 11; i++) {
- putchar(' ');
- }
- putchar('|');
- putchar(' ');
- */
- for (int i=0; i < indent; i++) {
- putchar(' ');
- }
- switch(ils->tag) {
- case str:
- printf("str %s\n", format_str(ils->content.literal)->data);
- break;
- case linebreak:
- printf("linebreak\n");
- break;
- case softbreak:
- printf("softbreak\n");
- break;
- case code:
- printf("code %s\n", format_str(ils->content.literal)->data);
- break;
- case raw_html:
- printf("html %s\n", format_str(ils->content.literal)->data);
- break;
- case entity:
- printf("entity %s\n", format_str(ils->content.literal)->data);
- break;
- case link:
- printf("link url=%s title=%s\n",
- format_str(ils->content.linkable.url)->data,
- format_str(ils->content.linkable.title)->data);
- print_inlines(ils->content.linkable.label, indent + 2);
- break;
- case image:
- printf("image url=%s title=%s\n",
- format_str(ils->content.linkable.url)->data,
- format_str(ils->content.linkable.title)->data);
- print_inlines(ils->content.linkable.label, indent + 2);
- break;
- case strong:
- printf("strong\n");
- print_inlines(ils->content.linkable.label, indent + 2);
- break;
- case emph:
- printf("emph\n");
- print_inlines(ils->content.linkable.label, indent + 2);
- break;
- }
- ils = ils->next;
- }
+ while(ils != NULL) {
+ for (int i=0; i < indent; i++) {
+ putchar(' ');
+ }
+ switch(ils->tag) {
+ case str:
+ printf("str ");
+ print_str(ils->content.literal.data, ils->content.literal.len);
+ putchar('\n');
+ break;
+ case linebreak:
+ printf("linebreak\n");
+ break;
+ case softbreak:
+ printf("softbreak\n");
+ break;
+ case code:
+ printf("code ");
+ print_str(ils->content.literal.data, ils->content.literal.len);
+ putchar('\n');
+ break;
+ case raw_html:
+ printf("html ");
+ print_str(ils->content.literal.data, ils->content.literal.len);
+ putchar('\n');
+ break;
+ case entity:
+ printf("entity ");
+ print_str(ils->content.literal.data, ils->content.literal.len);
+ putchar('\n');
+ break;
+ case link:
+ case image:
+ printf("%s url=", ils->tag == link ? "link" : "image");
+ print_str(ils->content.linkable.url, -1);
+ if (ils->content.linkable.title) {
+ printf(" title=");
+ print_str(ils->content.linkable.title, -1);
+ }
+ putchar('\n');
+ print_inlines(ils->content.linkable.label, indent + 2);
+ break;
+ case strong:
+ printf("strong\n");
+ print_inlines(ils->content.linkable.label, indent + 2);
+ break;
+ case emph:
+ printf("emph\n");
+ print_inlines(ils->content.linkable.label, indent + 2);
+ break;
+ }
+ ils = ils->next;
+ }
}
diff --git a/src/stmd.h b/src/stmd.h
index eb1b989..dc24235 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -105,19 +105,14 @@ extern block* add_child(block* parent,
int block_type, int start_line, int start_column);
void free_blocks(block* e);
-block *stmd_parse_document(const char *buffer, size_t len);
-
-// FOR NOW:
-void process_inlines(block* cur, reference** refmap);
-void incorporate_line(gh_buf *ln, int line_number, block** curptr);
-void finalize(block* b, int line_number);
+extern block *stmd_parse_document(const unsigned char *buffer, size_t len);
+extern block *stmd_parse_file(FILE *f);
void print_inlines(inl* ils, int indent);
void print_blocks(block* blk, int indent);
-/* TODO */
-// int blocks_to_html(block* b, bstring* result, bool tight);
-// int inlines_to_html(inl* b, bstring* result);
+void blocks_to_html(gh_buf *html, block *b, bool tight);
+void inlines_to_html(gh_buf *html, inl *b);
void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len);
diff --git a/src/utf8.c b/src/utf8.c
index 1a5df9e..e3f8dd3 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -1,6 +1,8 @@
#include
-#include "bstrlib.h"
-#include "debug.h"
+#include
+#include
+
+#include "stmd.h"
static const int8_t utf8proc_utf8class[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
diff --git a/src/utf8.h b/src/utf8.h
deleted file mode 100644
index fe59a90..0000000
--- a/src/utf8.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#include
-#include "bstrlib.h"
-
-extern unsigned char * from_utf8(unsigned char * s, unsigned int *n);
-extern int to_utf8(unsigned int c, bstring dest);
-extern bstring case_fold(bstring source);
--
cgit v1.2.3
From a7314deae649646f1f7ce5ede972641b5b62538c Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Wed, 3 Sep 2014 03:40:23 +0200
Subject: 338/103
---
Makefile | 4 +-
src/blocks.c | 173 +++++++++++++++++-----------------
src/buffer.c | 26 ++++-
src/buffer.h | 2 +-
src/html/houdini_href_e.c | 10 +-
src/html/houdini_html_e.c | 10 +-
src/html/html.c | 4 +-
src/inlines.c | 235 ++++++++++++++++++----------------------------
src/print.c | 2 +-
src/scanners.h | 28 +++---
src/scanners.re | 85 +++++++----------
src/stmd.h | 16 ++--
12 files changed, 261 insertions(+), 334 deletions(-)
(limited to 'src/buffer.h')
diff --git a/Makefile b/Makefile
index d14a928..89ec68c 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
-CFLAGS=-ggdb3 -O0 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS)
-LDFLAGS=-ggdb3 -O0 -Wall -Werror
+CFLAGS=-ggdb3 -O0 -Wall -Wextra -Wno-unused-variable -std=c99 -Isrc $(OPTFLAGS)
+LDFLAGS=-ggdb3 -O0 -Wall -Wno-unused-variable # -Werror
SRCDIR=src
DATADIR=data
diff --git a/src/blocks.c b/src/blocks.c
index 42f20db..94ff986 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -8,6 +8,8 @@
#include "scanners.h"
#include "uthash.h"
+#define peek_at(i, n) (i)->data[n]
+
static void incorporate_line(gh_buf *ln, int line_number, block** curptr);
static void finalize(block* b, int line_number);
@@ -27,7 +29,6 @@ static block* make_block(int tag, int start_line, int start_column)
e->top = NULL;
e->attributes.refmap = NULL;
gh_buf_init(&e->string_content, 32);
- e->string_pos = 0;
e->inline_content = NULL;
e->next = NULL;
e->prev = NULL;
@@ -80,10 +81,10 @@ static inline bool accepts_lines(int block_type)
block_type == fenced_code);
}
-static void add_line(block* block, gh_buf *ln, int offset)
+static void add_line(block* block, chunk *ch, int offset)
{
assert(block->open);
- gh_buf_put(&block->string_content, ln->ptr + offset, ln->size - offset);
+ gh_buf_put(&block->string_content, ch->data + offset, ch->len - offset);
}
static void remove_trailing_blank_lines(gh_buf *ln)
@@ -104,7 +105,7 @@ static void remove_trailing_blank_lines(gh_buf *ln)
i = gh_buf_strchr(ln, '\n', i);
if (i >= 0)
- gh_buf_truncate(ln, i + 1);
+ gh_buf_truncate(ln, i);
}
// Check to see if a block ends with a blank line, descending
@@ -162,12 +163,12 @@ static void finalize(block* b, int line_number)
switch (b->tag) {
case paragraph:
pos = 0;
- while (gh_buf_at(&b->string_content, b->string_pos) == '[' &&
- (pos = parse_reference(&b->string_content, b->string_pos,
- b->top->attributes.refmap))) {
- b->string_pos = pos;
+ while (gh_buf_at(&b->string_content, 0) == '[' &&
+ (pos = parse_reference(&b->string_content, b->top->attributes.refmap))) {
+
+ gh_buf_drop(&b->string_content, pos);
}
- if (is_blank(&b->string_content, b->string_pos)) {
+ if (is_blank(&b->string_content, 0)) {
b->tag = reference_def;
}
break;
@@ -179,14 +180,16 @@ static void finalize(block* b, int line_number)
case fenced_code:
// first line of contents becomes info
- firstlinelen = gh_buf_strchr(&b->string_content, '\n', b->string_pos);
+ firstlinelen = gh_buf_strchr(&b->string_content, '\n', 0);
+
+ gh_buf_init(&b->attributes.fenced_code_data.info, 0);
gh_buf_set(
&b->attributes.fenced_code_data.info,
- b->string_content.ptr + b->string_pos,
+ b->string_content.ptr,
firstlinelen
);
- b->string_pos = firstlinelen + 1;
+ gh_buf_drop(&b->string_content, firstlinelen + 1);
gh_buf_trim(&b->attributes.fenced_code_data.info);
unescape_buffer(&b->attributes.fenced_code_data.info);
@@ -281,7 +284,7 @@ void process_inlines(block* cur, reference** refmap)
case paragraph:
case atx_header:
case setext_header:
- cur->inline_content = parse_inlines(&cur->string_content, cur->string_pos, refmap);
+ cur->inline_content = parse_inlines(&cur->string_content, refmap);
// MEM
// gh_buf_free(&cur->string_content);
break;
@@ -300,19 +303,18 @@ void process_inlines(block* cur, reference** refmap)
// Attempts to parse a list item marker (bullet or enumerated).
// On success, returns length of the marker, and populates
// data with the details. On failure, returns 0.
-static int parse_list_marker(gh_buf *ln, int pos,
- struct ListData ** dataptr)
+static int parse_list_marker(chunk *input, int pos, struct ListData ** dataptr)
{
- char c;
+ unsigned char c;
int startpos;
struct ListData * data;
startpos = pos;
- c = gh_buf_at(ln, pos);
+ c = peek_at(input, pos);
- if ((c == '*' || c == '-' || c == '+') && !scan_hrule(ln, pos)) {
+ if ((c == '*' || c == '-' || c == '+') && !scan_hrule(input, pos)) {
pos++;
- if (!isspace(gh_buf_at(ln, pos))) {
+ if (!isspace(peek_at(input, pos))) {
return 0;
}
data = malloc(sizeof(struct ListData));
@@ -327,14 +329,14 @@ static int parse_list_marker(gh_buf *ln, int pos,
int start = 0;
do {
- start = (10 * start) + (gh_buf_at(ln, pos) - '0');
+ start = (10 * start) + (peek_at(input, pos) - '0');
pos++;
- } while (isdigit(gh_buf_at(ln, pos)));
+ } while (isdigit(peek_at(input, pos)));
- c = gh_buf_at(ln, pos);
+ c = peek_at(input, pos);
if (c == '.' || c == ')') {
pos++;
- if (!isspace(gh_buf_at(ln, pos))) {
+ if (!isspace(peek_at(input, pos))) {
return 0;
}
data = malloc(sizeof(struct ListData));
@@ -449,8 +451,26 @@ extern block *stmd_parse_document(const unsigned char *buffer, size_t len)
return finalize_document(document, linenum);
}
+static void chop_trailing_hashtags(chunk *ch)
+{
+ int n;
+
+ chunk_rtrim(ch);
+ n = ch->len - 1;
+
+ // if string ends in #s, remove these:
+ while (n >= 0 && peek_at(ch, n) == '#')
+ n--;
+
+ // the last # was escaped, so we include it.
+ if (n >= 0 && peek_at(ch, n) == '\\')
+ n++;
+
+ ch->len = n + 1;
+}
+
// Process one line at a time, modifying a block.
-static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
+static void incorporate_line(gh_buf *line, int line_number, block** curptr)
{
block* last_matched_container;
int offset = 0;
@@ -464,6 +484,10 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
bool blank = false;
int first_nonspace;
int indent;
+ chunk input;
+
+ input.data = line->ptr;
+ input.len = line->size;
// container starts at the document root.
container = cur->top;
@@ -475,21 +499,19 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
container = container->last_child;
first_nonspace = offset;
- while (gh_buf_at(ln, first_nonspace) == ' ') {
+ while (peek_at(&input, first_nonspace) == ' ') {
first_nonspace++;
}
indent = first_nonspace - offset;
- blank = gh_buf_at(ln, first_nonspace) == '\n';
+ blank = peek_at(&input, first_nonspace) == '\n';
if (container->tag == block_quote) {
-
- matched = indent <= 3 && gh_buf_at(ln, first_nonspace) == '>';
+ matched = indent <= 3 && peek_at(&input, first_nonspace) == '>';
if (matched) {
offset = first_nonspace + 1;
- if (gh_buf_at(ln, offset) == ' ') {
+ if (peek_at(&input, offset) == ' ')
offset++;
- }
} else {
all_matched = false;
}
@@ -526,7 +548,7 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
// skip optional spaces of fence offset
i = container->attributes.fenced_code_data.fence_offset;
- while (i > 0 && gh_buf_at(ln, offset) == ' ') {
+ while (i > 0 && peek_at(&input, offset) == ' ') {
offset++;
i--;
}
@@ -564,15 +586,13 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
container->tag != html_block) {
first_nonspace = offset;
- while (gh_buf_at(ln, first_nonspace) == ' ') {
+ while (peek_at(&input, first_nonspace) == ' ')
first_nonspace++;
- }
indent = first_nonspace - offset;
- blank = gh_buf_at(ln, first_nonspace) == '\n';
+ blank = peek_at(&input, first_nonspace) == '\n';
if (indent >= CODE_INDENT) {
-
if (cur->tag != paragraph && !blank) {
offset += CODE_INDENT;
container = add_child(container, indented_code, line_number, offset + 1);
@@ -580,76 +600,70 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
break;
}
- } else if (gh_buf_at(ln, first_nonspace) == '>') {
+ } else if (peek_at(&input, first_nonspace) == '>') {
offset = first_nonspace + 1;
// optional following character
- if (gh_buf_at(ln, offset) == ' ') {
+ if (peek_at(&input, offset) == ' ')
offset++;
- }
container = add_child(container, block_quote, line_number, offset + 1);
- } else if ((matched = scan_atx_header_start(ln, first_nonspace))) {
+ } else if ((matched = scan_atx_header_start(&input, first_nonspace))) {
offset = first_nonspace + matched;
container = add_child(container, atx_header, line_number, offset + 1);
- int hashpos = gh_buf_strchr(ln, '#', first_nonspace);
- assert(hashpos >= 0);
-
+ int hashpos = chunk_strchr(&input, '#', first_nonspace);
int level = 0;
- while (gh_buf_at(ln, hashpos) == '#') {
+
+ while (peek_at(&input, hashpos) == '#') {
level++;
hashpos++;
}
container->attributes.header_level = level;
- } else if ((matched = scan_open_code_fence(ln, first_nonspace))) {
+ } else if ((matched = scan_open_code_fence(&input, first_nonspace))) {
- container = add_child(container, fenced_code, line_number,
- first_nonspace + 1);
- container->attributes.fenced_code_data.fence_char = gh_buf_at(ln,
- first_nonspace);
+ container = add_child(container, fenced_code, line_number, first_nonspace + 1);
+ container->attributes.fenced_code_data.fence_char = peek_at(&input, first_nonspace);
container->attributes.fenced_code_data.fence_length = matched;
- container->attributes.fenced_code_data.fence_offset =
- first_nonspace - offset;
+ container->attributes.fenced_code_data.fence_offset = first_nonspace - offset;
offset = first_nonspace + matched;
- } else if ((matched = scan_html_block_tag(ln, first_nonspace))) {
+ } else if ((matched = scan_html_block_tag(&input, first_nonspace))) {
- container = add_child(container, html_block, line_number,
- first_nonspace + 1);
+ container = add_child(container, html_block, line_number, first_nonspace + 1);
// note, we don't adjust offset because the tag is part of the text
} else if (container->tag == paragraph &&
- (lev = scan_setext_header_line(ln, first_nonspace)) &&
+ (lev = scan_setext_header_line(&input, first_nonspace)) &&
// check that there is only one line in the paragraph:
gh_buf_strrchr(&container->string_content, '\n',
gh_buf_len(&container->string_content) - 2) < 0) {
container->tag = setext_header;
container->attributes.header_level = lev;
- offset = gh_buf_len(ln) - 1;
+ offset = input.len - 1;
} else if (!(container->tag == paragraph && !all_matched) &&
- (matched = scan_hrule(ln, first_nonspace))) {
+ (matched = scan_hrule(&input, first_nonspace))) {
// it's only now that we know the line is not part of a setext header:
container = add_child(container, hrule, line_number, first_nonspace + 1);
finalize(container, line_number);
container = container->parent;
- offset = gh_buf_len(ln) - 1;
+ offset = input.len - 1;
- } else if ((matched = parse_list_marker(ln, first_nonspace, &data))) {
+ } else if ((matched = parse_list_marker(&input, first_nonspace, &data))) {
// compute padding:
offset = first_nonspace + matched;
i = 0;
- while (i <= 5 && gh_buf_at(ln, offset + i) == ' ') {
+ while (i <= 5 && peek_at(&input, offset + i) == ' ') {
i++;
}
// i = number of spaces after marker, up to 5
- if (i >= 5 || i < 1 || gh_buf_at(ln, offset) == '\n') {
+ if (i >= 5 || i < 1 || peek_at(&input, offset) == '\n') {
data->padding = matched + 1;
if (i > 0) {
offset += 1;
@@ -674,6 +688,7 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
// add the list item
container = add_child(container, list_item, line_number,
first_nonspace + 1);
+ /* TODO: static */
container->attributes.list_data = *data;
free(data);
@@ -691,12 +706,11 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
// appropriate container.
first_nonspace = offset;
- while (gh_buf_at(ln, first_nonspace) == ' ') {
+ while (peek_at(&input, first_nonspace) == ' ')
first_nonspace++;
- }
indent = first_nonspace - offset;
- blank = gh_buf_at(ln, first_nonspace) == '\n';
+ blank = peek_at(&input, first_nonspace) == '\n';
// block quote lines are never blank as they start with >
// and we don't count blanks in fenced code for purposes of tight/loose
@@ -721,13 +735,12 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
cur->tag == paragraph &&
gh_buf_len(&cur->string_content) > 0) {
- add_line(cur, ln, offset);
+ add_line(cur, &input, offset);
} else { // not a lazy continuation
// finalize any blocks that were not matched and set cur to container:
while (cur != last_matched_container) {
-
finalize(cur, line_number);
cur = cur->parent;
assert(cur != NULL);
@@ -735,58 +748,46 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr)
if (container->tag == indented_code) {
- add_line(container, ln, offset);
+ add_line(container, &input, offset);
} else if (container->tag == fenced_code) {
matched = (indent <= 3
- && gh_buf_at(ln, first_nonspace) == container->attributes.fenced_code_data.fence_char)
- && scan_close_code_fence(ln, first_nonspace,
+ && peek_at(&input, first_nonspace) == container->attributes.fenced_code_data.fence_char)
+ && scan_close_code_fence(&input, first_nonspace,
container->attributes.fenced_code_data.fence_length);
if (matched) {
// if closing fence, don't add line to container; instead, close it:
finalize(container, line_number);
container = container->parent; // back up to parent
} else {
- add_line(container, ln, offset);
+ add_line(container, &input, offset);
}
} else if (container->tag == html_block) {
- add_line(container, ln, offset);
+ add_line(container, &input, offset);
} else if (blank) {
// ??? do nothing
} else if (container->tag == atx_header) {
- // chop off trailing ###s...use a scanner?
- gh_buf_trim(ln);
- int p = gh_buf_len(ln) - 1;
-
- // if string ends in #s, remove these:
- while (gh_buf_at(ln, p) == '#') {
- p--;
- }
- if (gh_buf_at(ln, p) == '\\') {
- // the last # was escaped, so we include it.
- p++;
- }
- gh_buf_truncate(ln, p + 1);
- add_line(container, ln, first_nonspace);
+ chop_trailing_hashtags(&input);
+ add_line(container, &input, first_nonspace);
finalize(container, line_number);
container = container->parent;
} else if (accepts_lines(container->tag)) {
- add_line(container, ln, first_nonspace);
+ add_line(container, &input, first_nonspace);
} else if (container->tag != hrule && container->tag != setext_header) {
// create paragraph container for line
container = add_child(container, paragraph, line_number, first_nonspace + 1);
- add_line(container, ln, first_nonspace);
+ add_line(container, &input, first_nonspace);
} else {
assert(false);
diff --git a/src/buffer.c b/src/buffer.c
index cfc6a7e..dc4a405 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -95,7 +95,7 @@ void gh_buf_clear(gh_buf *buf)
int gh_buf_set(gh_buf *buf, const unsigned char *data, int len)
{
- if (len == 0 || data == NULL) {
+ if (len <= 0 || data == NULL) {
gh_buf_clear(buf);
} else {
if (data != buf->ptr) {
@@ -125,6 +125,9 @@ int gh_buf_putc(gh_buf *buf, int c)
int gh_buf_put(gh_buf *buf, const unsigned char *data, int len)
{
+ if (len <= 0)
+ return 0;
+
ENSURE_SIZE(buf, buf->size + len + 1);
memmove(buf->ptr + buf->size, data, len);
buf->size += len;
@@ -272,15 +275,28 @@ void gh_buf_truncate(gh_buf *buf, int len)
}
}
+void gh_buf_drop(gh_buf *buf, int n)
+{
+ if (n > 0) {
+ buf->size = buf->size - n;
+ if (buf->size)
+ memmove(buf->ptr, buf->ptr + n, buf->size);
+
+ buf->ptr[buf->size] = '\0';
+ }
+}
+
void gh_buf_trim(gh_buf *buf)
{
- /* TODO: leading whitespace? */
- /*
+ int i = 0;
+
+ if (!buf->size)
+ return;
+
while (i < buf->size && isspace(buf->ptr[i]))
i++;
- gh_buf_truncate(buf, i);
- */
+ gh_buf_drop(buf, i);
/* rtrim */
while (buf->size > 0) {
diff --git a/src/buffer.h b/src/buffer.h
index 422ef02..0d5143e 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -105,8 +105,8 @@ extern void gh_buf_clear(gh_buf *buf);
int gh_buf_strchr(const gh_buf *buf, int c, int pos);
int gh_buf_strrchr(const gh_buf *buf, int c, int pos);
+void gh_buf_drop(gh_buf *buf, int n);
void gh_buf_truncate(gh_buf *buf, int len);
-void gh_buf_ltruncate(gh_buf *buf, int len);
void gh_buf_trim(gh_buf *buf);
#endif
diff --git a/src/html/houdini_href_e.c b/src/html/houdini_href_e.c
index 59fe850..b2a7d79 100644
--- a/src/html/houdini_href_e.c
+++ b/src/html/houdini_href_e.c
@@ -62,16 +62,8 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
while (i < size && HREF_SAFE[src[i]] != 0)
i++;
- if (likely(i > org)) {
- if (unlikely(org == 0)) {
- if (i >= size)
- return 0;
-
- gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
- }
-
+ if (likely(i > org))
gh_buf_put(ob, src + org, i - org);
- }
/* escaping */
if (i >= size)
diff --git a/src/html/houdini_html_e.c b/src/html/houdini_html_e.c
index 316c5ce..5cdd3dd 100644
--- a/src/html/houdini_html_e.c
+++ b/src/html/houdini_html_e.c
@@ -54,16 +54,8 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0)
i++;
- if (i > org) {
- if (unlikely(org == 0)) {
- if (i >= size)
- return 0;
-
- gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size));
- }
-
+ if (i > org)
gh_buf_put(ob, src + org, i - org);
- }
/* escaping */
if (unlikely(i >= size))
diff --git a/src/html/html.c b/src/html/html.c
index 2f160ca..27ffe58 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -68,7 +68,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
cr(html);
gh_buf_puts(html, "");
blocks_to_html(html, b->children, tight);
- gh_buf_trim(html);
+ gh_buf_trim(html); /* TODO: rtrim */
gh_buf_puts(html, "");
cr(html);
break;
@@ -106,7 +106,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
cr(html);
gh_buf_puts(html, "");
escape_html(html, b->string_content.ptr, b->string_content.size);
- gh_buf_puts(html, "
");
+ gh_buf_puts(html, "");
cr(html);
break;
diff --git a/src/inlines.c b/src/inlines.c
index 7b48ad9..ef27a24 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -9,10 +9,10 @@
#include "scanners.h"
typedef struct Subject {
- const gh_buf *buffer;
- int pos;
- reference** reference_map;
- int label_nestlevel;
+ chunk input;
+ int pos;
+ int label_nestlevel;
+ reference** reference_map;
} subject;
reference* lookup_reference(reference** refmap, chunk *label);
@@ -27,12 +27,16 @@ inline static void chunk_trim(chunk *c);
inline static chunk chunk_literal(const char *data);
inline static chunk chunk_buf_detach(gh_buf *buf);
-inline static chunk chunk_buf(const gh_buf *buf, int pos, int len);
+inline static chunk chunk_dup(const chunk *ch, int pos, int len);
static inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
static inl *parse_inlines_while(subject* subj, int (*f)(subject*));
static int parse_inline(subject* subj, inl ** last);
+static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap);
+static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap);
+static int subject_find_special_char(subject *subj);
+
extern void free_reference(reference *ref) {
free(ref->label);
free(ref->url);
@@ -101,10 +105,12 @@ extern reference* make_reference(chunk *label, chunk *url, chunk *title)
extern void add_reference(reference** refmap, reference* ref)
{
reference * t = NULL;
- HASH_FIND(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), t);
+ const char *label = (const char *)ref->label;
+
+ HASH_FIND(hh, *refmap, label, strlen(label), t);
if (t == NULL) {
- HASH_ADD_KEYPTR(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), ref);
+ HASH_ADD_KEYPTR(hh, *refmap, label, strlen(label), ref);
} else {
free_reference(ref); // we free this now since it won't be in the refmap
}
@@ -210,87 +216,49 @@ inline static inl* append_inlines(inl* a, inl* b)
return a;
}
-// Make a 'subject' from an input string.
-static void init_subject(subject *e, gh_buf *buffer, int input_pos, reference** refmap)
+static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap)
{
- e->buffer = buffer;
- e->pos = input_pos;
+ e->input.data = buffer->ptr;
+ e->input.len = buffer->size;
+ e->input.alloc = 0;
+ e->pos = 0;
e->label_nestlevel = 0;
e->reference_map = refmap;
-}
-
-inline static int isbacktick(int c)
-{
- return (c == '`');
-}
-
-inline static void chunk_free(chunk *c)
-{
- if (c->alloc)
- free((char *)c->data);
-
- c->data = NULL;
- c->alloc = 0;
- c->len = 0;
-}
-
-inline static void chunk_trim(chunk *c)
-{
- while (c->len && isspace(c->data[0])) {
- c->data++;
- c->len--;
- }
-
- while (c->len > 0) {
- if (!isspace(c->data[c->len - 1]))
- break;
- c->len--;
- }
+ chunk_rtrim(&e->input);
}
-inline static unsigned char *chunk_to_cstr(chunk *c)
+static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap)
{
- unsigned char *str;
-
- str = malloc(c->len + 1);
- memcpy(str, c->data, c->len);
- str[c->len] = 0;
+ e->input.data = chunk->data;
+ e->input.len = chunk->len;
+ e->input.alloc = 0;
+ e->pos = 0;
+ e->label_nestlevel = 0;
+ e->reference_map = refmap;
- return str;
+ chunk_rtrim(&e->input);
}
-inline static chunk chunk_literal(const char *data)
+inline static int isbacktick(int c)
{
- chunk c = {data, data ? strlen(data) : 0, 0};
- return c;
+ return (c == '`');
}
-inline static chunk chunk_buf(const gh_buf *buf, int pos, int len)
+static inline unsigned char peek_char(subject *subj)
{
- chunk c = {buf->ptr + pos, len, 0};
- return c;
+ return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0;
}
-inline static chunk chunk_buf_detach(gh_buf *buf)
+static inline unsigned char peek_at(subject *subj, int pos)
{
- chunk c;
-
- c.len = buf->size;
- c.data = gh_buf_detach(buf);
- c.alloc = 1;
-
- return c;
+ return subj->input.data[pos];
}
-// Return the next character in the subject, without advancing.
-// Return 0 if at the end of the subject.
-#define peek_char(subj) gh_buf_at((subj)->buffer, (subj)->pos)
-
// Return true if there are more characters in the subject.
inline static int is_eof(subject* subj)
{
- return (subj->pos >= gh_buf_len(subj->buffer));
+ return (subj->pos >= subj->input.len);
}
// Advance the subject. Doesn't check for eof.
@@ -308,7 +276,7 @@ inline static chunk take_while(subject* subj, int (*f)(int))
len++;
}
- return chunk_buf(subj->buffer, startpos, len);
+ return chunk_dup(&subj->input, startpos, len);
}
// Try to process a backtick code span that began with a
@@ -388,7 +356,7 @@ static inl* handle_backticks(subject *subj)
} else {
gh_buf buf = GH_BUF_INIT;
- gh_buf_set(&buf, subj->buffer->ptr + startpos, endpos - startpos - openticks.len);
+ gh_buf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
gh_buf_trim(&buf);
normalize_whitespace(&buf);
@@ -404,7 +372,7 @@ static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close)
char char_before, char_after;
int startpos = subj->pos;
- char_before = subj->pos == 0 ? '\n' : gh_buf_at(subj->buffer, subj->pos - 1);
+ char_before = subj->pos == 0 ? '\n' : peek_at(subj, subj->pos - 1);
while (peek_char(subj) == c) {
numdelims++;
advance(subj);
@@ -439,7 +407,7 @@ static inl* handle_strong_emph(subject* subj, char c)
numdelims = scan_delims(subj, c, &can_open, &can_close);
subj->pos += numdelims;
- new = make_str(chunk_buf(subj->buffer, subj->pos - numdelims, numdelims));
+ new = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims));
*last = new;
first_head = new;
result = new;
@@ -488,7 +456,7 @@ static inl* handle_strong_emph(subject* subj, char c)
numdelims = scan_delims(subj, c, &can_open, &can_close);
if (can_close && numdelims >= 1 && numdelims <= 3 &&
numdelims != first_close_delims) {
- new = make_str(chunk_buf(subj->buffer, subj->pos, numdelims));
+ new = make_str(chunk_dup(&subj->input, subj->pos, numdelims));
append_inlines(*last, new);
*last = new;
if (first_close_delims == 1 && numdelims > 2) {
@@ -554,7 +522,7 @@ static inl* handle_backslash(subject *subj)
unsigned char nextchar = peek_char(subj);
if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped
advance(subj);
- return make_str(chunk_buf(subj->buffer, subj->pos - 1, 1));
+ return make_str(chunk_dup(&subj->input, subj->pos - 1, 1));
} else if (nextchar == '\n') {
advance(subj);
return make_linebreak();
@@ -569,9 +537,9 @@ static inl* handle_entity(subject* subj)
{
int match;
inl *result;
- match = scan_entity(subj->buffer, subj->pos);
+ match = scan_entity(&subj->input, subj->pos);
if (match) {
- result = make_entity(chunk_buf(subj->buffer, subj->pos, match));
+ result = make_entity(chunk_dup(&subj->input, subj->pos, match));
subj->pos += match;
} else {
advance(subj);
@@ -584,15 +552,13 @@ static inl* handle_entity(subject* subj)
// Returns an inline sequence consisting of str and entity elements.
static inl *make_str_with_entities(chunk *content)
{
- inl * result = NULL;
- inl * new;
+ inl *result = NULL;
+ inl *new;
int searchpos;
char c;
subject subj;
- gh_buf content_buf = GH_BUF_INIT;
- gh_buf_set(&content_buf, content->data, content->len);
- init_subject(&subj, &content_buf, 0, NULL);
+ subject_from_chunk(&subj, content, NULL);
while ((c = peek_char(&subj))) {
switch (c) {
@@ -600,18 +566,13 @@ static inl *make_str_with_entities(chunk *content)
new = handle_entity(&subj);
break;
default:
- searchpos = gh_buf_strchr(subj.buffer, '&', subj.pos);
- if (searchpos < 0) {
- searchpos = gh_buf_len(subj.buffer);
- }
-
- new = make_str(chunk_buf(subj.buffer, subj.pos, searchpos - subj.pos));
+ searchpos = chunk_strchr(&subj.input, '&', subj.pos);
+ new = make_str(chunk_dup(&subj.input, subj.pos, searchpos - subj.pos));
subj.pos = searchpos;
}
result = append_inlines(result, new);
}
- gh_buf_free(&content_buf);
return result;
}
@@ -678,9 +639,9 @@ static inl* handle_pointy_brace(subject* subj)
advance(subj); // advance past first <
// first try to match a URL autolink
- matchlen = scan_autolink_uri(subj->buffer, subj->pos);
+ matchlen = scan_autolink_uri(&subj->input, subj->pos);
if (matchlen > 0) {
- contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1);
+ contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
subj->pos += matchlen;
return make_link(
@@ -691,11 +652,11 @@ static inl* handle_pointy_brace(subject* subj)
}
// next try to match an email autolink
- matchlen = scan_autolink_email(subj->buffer, subj->pos);
+ matchlen = scan_autolink_email(&subj->input, subj->pos);
if (matchlen > 0) {
gh_buf mail_url = GH_BUF_INIT;
- contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1);
+ contents = chunk_dup(&subj->input, subj->pos, matchlen - 1);
subj->pos += matchlen;
gh_buf_puts(&mail_url, "mailto:");
@@ -709,9 +670,9 @@ static inl* handle_pointy_brace(subject* subj)
}
// finally, try to match an html tag
- matchlen = scan_html_tag(subj->buffer, subj->pos);
+ matchlen = scan_html_tag(&subj->input, subj->pos);
if (matchlen > 0) {
- contents = chunk_buf(subj->buffer, subj->pos - 1, matchlen + 1);
+ contents = chunk_dup(&subj->input, subj->pos - 1, matchlen + 1);
subj->pos += matchlen;
return make_raw_html(contents);
}
@@ -776,12 +737,7 @@ static int link_label(subject* subj, chunk *raw_label)
}
}
if (c == ']') {
- *raw_label = chunk_buf(
- subj->buffer,
- startpos + 1,
- subj->pos - (startpos + 1)
- );
-
+ *raw_label = chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1));
subj->label_nestlevel = 0;
advance(subj); // advance past ]
return 1;
@@ -813,25 +769,25 @@ static inl* handle_left_bracket(subject* subj)
if (found_label) {
if (peek_char(subj) == '(' &&
- ((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) &&
- ((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) {
+ ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) &&
+ ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) {
// try to parse an explicit link:
starturl = subj->pos + 1 + sps; // after (
endurl = starturl + n;
- starttitle = endurl + scan_spacechars(subj->buffer, endurl);
+ starttitle = endurl + scan_spacechars(&subj->input, endurl);
// ensure there are spaces btw url and title
endtitle = (starttitle == endurl) ? starttitle :
- starttitle + scan_link_title(subj->buffer, starttitle);
+ starttitle + scan_link_title(&subj->input, starttitle);
- endall = endtitle + scan_spacechars(subj->buffer, endtitle);
+ endall = endtitle + scan_spacechars(&subj->input, endtitle);
- if (gh_buf_at(subj->buffer, endall) == ')') {
+ if (peek_at(subj, endall) == ')') {
subj->pos = endall + 1;
- url = chunk_buf(subj->buffer, starturl, endurl - starturl);
- title = chunk_buf(subj->buffer, starttitle, endtitle - starttitle);
+ url = chunk_dup(&subj->input, starturl, endurl - starturl);
+ title = chunk_dup(&subj->input, starttitle, endtitle - starttitle);
lab = parse_chunk_inlines(&rawlabel, NULL);
return make_link(lab, url, title);
@@ -850,7 +806,7 @@ static inl* handle_left_bracket(subject* subj)
// Check for reference link.
// First, see if there's another label:
- subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel);
+ subj->pos = subj->pos + scan_spacechars(&subj->input, endlabel);
reflabel = rawlabel;
// if followed by a nonempty link label, we change reflabel to it:
@@ -892,8 +848,8 @@ static inl* handle_newline(subject *subj)
advance(subj);
}
if (nlpos > 1 &&
- gh_buf_at(subj->buffer, nlpos - 1) == ' ' &&
- gh_buf_at(subj->buffer, nlpos - 2) == ' ') {
+ peek_at(subj, nlpos - 1) == ' ' &&
+ peek_at(subj, nlpos - 2) == ' ') {
return make_linebreak();
} else {
return make_softbreak();
@@ -917,30 +873,22 @@ extern inl* parse_inlines_while(subject* subj, int (*f)(subject*))
inl *parse_chunk_inlines(chunk *chunk, reference** refmap)
{
- inl *result;
subject subj;
- gh_buf full_chunk = GH_BUF_INIT;
-
- gh_buf_set(&full_chunk, chunk->data, chunk->len);
- init_subject(&subj, &full_chunk, 0, refmap);
- result = parse_inlines_while(&subj, not_eof);
-
- gh_buf_free(&full_chunk);
- return result;
+ subject_from_chunk(&subj, chunk, refmap);
+ return parse_inlines_while(&subj, not_eof);
}
-static int find_special_char(subject *subj)
+static int subject_find_special_char(subject *subj)
{
int n = subj->pos + 1;
- int size = (int)gh_buf_len(subj->buffer);
- while (n < size) {
- if (strchr("\n\\`&_*[]buffer, n)))
+ while (n < subj->input.len) {
+ if (strchr("\n\\`&_*[]input.data[n]))
return n;
n++;
}
- return -1;
+ return subj->input.len;
}
// Parse an inline, advancing subject, and add it to last element.
@@ -973,11 +921,13 @@ static int parse_inline(subject* subj, inl ** last)
new = handle_pointy_brace(subj);
break;
case '_':
- if (subj->pos > 0 && (isalnum(gh_buf_at(subj->buffer, subj->pos - 1)) ||
- gh_buf_at(subj->buffer, subj->pos - 1) == '_')) {
- new = make_str(chunk_literal("_"));
- advance(subj);
- break;
+ if (subj->pos > 0) {
+ unsigned char prev = peek_at(subj, subj->pos - 1);
+ if (isalnum(prev) || prev == '_') {
+ new = make_str(chunk_literal("_"));
+ advance(subj);
+ break;
+ }
}
new = handle_strong_emph(subj, '_');
@@ -1002,18 +952,13 @@ static int parse_inline(subject* subj, inl ** last)
}
break;
default:
- text_literal:
- endpos = find_special_char(subj);
- if (endpos < 0) {
- endpos = gh_buf_len(subj->buffer);
- }
-
- contents = chunk_buf(subj->buffer, subj->pos, endpos - subj->pos);
+ endpos = subject_find_special_char(subj);
+ contents = chunk_dup(&subj->input, subj->pos, endpos - subj->pos);
subj->pos = endpos;
// if we're at a newline, strip trailing spaces.
if (peek_char(subj) == '\n') {
- chunk_trim(&contents);
+ chunk_rtrim(&contents);
}
new = make_str(contents);
@@ -1026,10 +971,10 @@ static int parse_inline(subject* subj, inl ** last)
return 1;
}
-extern inl* parse_inlines(gh_buf *input, int input_pos, reference** refmap)
+extern inl* parse_inlines(gh_buf *input, reference** refmap)
{
subject subj;
- init_subject(&subj, input, input_pos, refmap);
+ subject_from_buf(&subj, input, refmap);
return parse_inlines_while(&subj, not_eof);
}
@@ -1048,7 +993,7 @@ void spnl(subject* subj)
// Modify refmap if a reference is encountered.
// Return 0 if no reference found, otherwise position of subject
// after reference is parsed.
-extern int parse_reference(gh_buf *input, int input_pos, reference** refmap)
+extern int parse_reference(gh_buf *input, reference** refmap)
{
subject subj;
@@ -1058,9 +1003,9 @@ extern int parse_reference(gh_buf *input, int input_pos, reference** refmap)
int matchlen = 0;
int beforetitle;
- reference * new = NULL;
+ reference *new = NULL;
- init_subject(&subj, input, input_pos, NULL);
+ subject_from_buf(&subj, input, NULL);
// parse label:
if (!link_label(&subj, &lab))
@@ -1075,9 +1020,9 @@ extern int parse_reference(gh_buf *input, int input_pos, reference** refmap)
// parse link url:
spnl(&subj);
- matchlen = scan_link_url(subj.buffer, subj.pos);
+ matchlen = scan_link_url(&subj.input, subj.pos);
if (matchlen) {
- url = chunk_buf(subj.buffer, subj.pos, matchlen);
+ url = chunk_dup(&subj.input, subj.pos, matchlen);
subj.pos += matchlen;
} else {
return 0;
@@ -1086,9 +1031,9 @@ extern int parse_reference(gh_buf *input, int input_pos, reference** refmap)
// parse optional link_title
beforetitle = subj.pos;
spnl(&subj);
- matchlen = scan_link_title(subj.buffer, subj.pos);
+ matchlen = scan_link_title(&subj.input, subj.pos);
if (matchlen) {
- title = chunk_buf(subj.buffer, subj.pos, matchlen);
+ title = chunk_dup(&subj.input, subj.pos, matchlen);
subj.pos += matchlen;
} else {
subj.pos = beforetitle;
diff --git a/src/print.c b/src/print.c
index 0a87925..c262995 100644
--- a/src/print.c
+++ b/src/print.c
@@ -9,7 +9,7 @@ static void print_str(const unsigned char *s, int len)
int i;
if (len < 0)
- len = strlen(s);
+ len = strlen((char *)s);
putchar('"');
for (i = 0; i < len; ++i) {
diff --git a/src/scanners.h b/src/scanners.h
index b6e586b..f96c42d 100644
--- a/src/scanners.h
+++ b/src/scanners.h
@@ -1,15 +1,15 @@
-#include "buffer.h"
+#include "stmd.h"
-int scan_autolink_uri(const gh_buf *s, int pos);
-int scan_autolink_email(const gh_buf *s, int pos);
-int scan_html_tag(const gh_buf *s, int pos);
-int scan_html_block_tag(const gh_buf *s, int pos);
-int scan_link_url(const gh_buf *s, int pos);
-int scan_link_title(const gh_buf *s, int pos);
-int scan_spacechars(const gh_buf *s, int pos);
-int scan_atx_header_start(const gh_buf *s, int pos);
-int scan_setext_header_line(const gh_buf *s, int pos);
-int scan_hrule(const gh_buf *s, int pos);
-int scan_open_code_fence(const gh_buf *s, int pos);
-int scan_close_code_fence(const gh_buf *s, int pos, int len);
-int scan_entity(const gh_buf *s, int pos);
+int scan_autolink_uri(chunk *c, int offset);
+int scan_autolink_email(chunk *c, int offset);
+int scan_html_tag(chunk *c, int offset);
+int scan_html_block_tag(chunk *c, int offset);
+int scan_link_url(chunk *c, int offset);
+int scan_link_title(chunk *c, int offset);
+int scan_spacechars(chunk *c, int offset);
+int scan_atx_header_start(chunk *c, int offset);
+int scan_setext_header_line(chunk *c, int offset);
+int scan_hrule(chunk *c, int offset);
+int scan_open_code_fence(chunk *c, int offset);
+int scan_close_code_fence(chunk *c, int offset, int len);
+int scan_entity(chunk *c, int offset);
diff --git a/src/scanners.re b/src/scanners.re
index 7323ef9..5ac7c15 100644
--- a/src/scanners.re
+++ b/src/scanners.re
@@ -1,8 +1,15 @@
-#include "buffer.h"
+#include "scanners.h"
+
+#define SCAN_DATA \
+ const unsigned char *marker = NULL; \
+ const unsigned char *p = c->data + offset; \
+ const unsigned char *start = p; \
+ const unsigned char *end = c->data + c->len
/*!re2c
re2c:define:YYCTYPE = "unsigned char";
re2c:define:YYCURSOR = p;
+ re2c:define:YYLIMIT = end;
re2c:define:YYMARKER = marker;
re2c:define:YYCTXMARKER = marker;
re2c:yyfill:enable = 0;
@@ -55,11 +62,9 @@
*/
// Try to match URI autolink after first <, returning number of chars matched.
-extern int scan_autolink_uri(const gh_buf *s, int pos)
+extern int scan_autolink_uri(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
scheme [:]([^\x00-\x20<>\\]|escaped_char)*[>] { return (p - start); }
.? { return 0; }
@@ -67,11 +72,9 @@ extern int scan_autolink_uri(const gh_buf *s, int pos)
}
// Try to match email autolink after first <, returning num of chars matched.
-extern int scan_autolink_email(const gh_buf *s, int pos)
+extern int scan_autolink_email(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+
[@]
@@ -83,11 +86,9 @@ extern int scan_autolink_email(const gh_buf *s, int pos)
}
// Try to match an HTML tag after first <, returning num of chars matched.
-extern int scan_html_tag(const gh_buf *s, int pos)
+extern int scan_html_tag(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
htmltag { return (p - start); }
.? { return 0; }
@@ -96,11 +97,9 @@ extern int scan_html_tag(const gh_buf *s, int pos)
// Try to match an HTML block tag including first <,
// returning num of chars matched.
-extern int scan_html_block_tag(const gh_buf *s, int pos)
+extern int scan_html_block_tag(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
[<] [/] blocktagname (spacechar | [>]) { return (p - start); }
[<] blocktagname (spacechar | [/>]) { return (p - start); }
@@ -113,11 +112,9 @@ extern int scan_html_block_tag(const gh_buf *s, int pos)
// This may optionally be contained in <..>; otherwise
// whitespace and unbalanced right parentheses aren't allowed.
// Newlines aren't ever allowed.
-extern int scan_link_url(const gh_buf *s, int pos)
+extern int scan_link_url(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
[ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); }
[ \n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); }
@@ -128,11 +125,9 @@ extern int scan_link_url(const gh_buf *s, int pos)
// Try to match a link title (in single quotes, in double quotes, or
// in parentheses), returning number of chars matched. Allow one
// level of internal nesting (quotes within quotes).
-extern int scan_link_title(const gh_buf *s, int pos)
+extern int scan_link_title(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
["] (escaped_char|[^"\x00])* ["] { return (p - start); }
['] (escaped_char|[^'\x00])* ['] { return (p - start); }
@@ -142,10 +137,9 @@ extern int scan_link_title(const gh_buf *s, int pos)
}
// Match space characters, including newlines.
-extern int scan_spacechars(const gh_buf *s, int pos)
+extern int scan_spacechars(chunk *c, int offset)
{
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
[ \t\n]* { return (p - start); }
. { return 0; }
@@ -153,11 +147,9 @@ extern int scan_spacechars(const gh_buf *s, int pos)
}
// Match ATX header start.
-extern int scan_atx_header_start(const gh_buf *s, int pos)
+extern int scan_atx_header_start(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
[#]{1,6} ([ ]+|[\n]) { return (p - start); }
.? { return 0; }
@@ -166,10 +158,9 @@ extern int scan_atx_header_start(const gh_buf *s, int pos)
// Match sexext header line. Return 1 for level-1 header,
// 2 for level-2, 0 for no match.
-extern int scan_setext_header_line(const gh_buf *s, int pos)
+extern int scan_setext_header_line(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
+ SCAN_DATA;
/*!re2c
[=]+ [ ]* [\n] { return 1; }
[-]+ [ ]* [\n] { return 2; }
@@ -180,11 +171,9 @@ extern int scan_setext_header_line(const gh_buf *s, int pos)
// Scan a horizontal rule line: "...three or more hyphens, asterisks,
// or underscores on a line by themselves. If you wish, you may use
// spaces between the hyphens or asterisks."
-extern int scan_hrule(const gh_buf *s, int pos)
+extern int scan_hrule(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
([*][ ]*){3,} [ \t]* [\n] { return (p - start); }
([_][ ]*){3,} [ \t]* [\n] { return (p - start); }
@@ -194,11 +183,9 @@ extern int scan_hrule(const gh_buf *s, int pos)
}
// Scan an opening code fence.
-extern int scan_open_code_fence(const gh_buf *s, int pos)
+extern int scan_open_code_fence(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
[`]{3,} / [^`\n\x00]*[\n] { return (p - start); }
[~]{3,} / [^~\n\x00]*[\n] { return (p - start); }
@@ -207,11 +194,9 @@ extern int scan_open_code_fence(const gh_buf *s, int pos)
}
// Scan a closing code fence with length at least len.
-extern int scan_close_code_fence(const gh_buf *s, int pos, int len)
+extern int scan_close_code_fence(chunk *c, int offset, int len)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
([`]{3,} | [~]{3,}) / spacechar* [\n]
{ if (p - start > len) {
@@ -225,11 +210,9 @@ extern int scan_close_code_fence(const gh_buf *s, int pos, int len)
// Scans an entity.
// Returns number of chars matched.
-extern int scan_entity(const gh_buf *s, int pos)
+extern int scan_entity(chunk *c, int offset)
{
- unsigned char * marker = NULL;
- unsigned char * p = &(s->ptr[pos]);
- unsigned char * start = p;
+ SCAN_DATA;
/*!re2c
[&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;]
{ return (p - start); }
diff --git a/src/stmd.h b/src/stmd.h
index 3e284bd..4a3c399 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -1,17 +1,15 @@
+#ifndef _STDMD_H_
+#define _STDMD_H_
+
#include
#include
#include "buffer.h"
+#include "chunk.h"
#include "uthash.h"
#define VERSION "0.1"
#define CODE_INDENT 4
-typedef struct {
- const unsigned char *data;
- int len;
- int alloc;
-} chunk;
-
typedef struct Inline {
enum { INL_STRING, INL_SOFTBREAK, INL_LINEBREAK, INL_CODE, INL_RAW_HTML, INL_ENTITY,
INL_EMPH, INL_STRONG, INL_LINK, INL_IMAGE } tag;
@@ -79,7 +77,6 @@ typedef struct Block {
struct Block* parent;
struct Block* top;
gh_buf string_content;
- int string_pos;
inl* inline_content;
union {
struct ListData list_data;
@@ -91,10 +88,10 @@ typedef struct Block {
struct Block * prev;
} block;
-inl* parse_inlines(gh_buf *input, int input_pos, reference** refmap);
+inl* parse_inlines(gh_buf *input, reference** refmap);
void free_inlines(inl* e);
-int parse_reference(gh_buf *input, int input_pos, reference** refmap);
+int parse_reference(gh_buf *input, reference** refmap);
void free_reference(reference *ref);
void free_reference_map(reference **refmap);
@@ -117,3 +114,4 @@ void inlines_to_html(gh_buf *html, inl *b);
void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len);
+#endif
--
cgit v1.2.3
From 543c2c94d71adee42c7bd2f8027d75c87ed8120d Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Thu, 4 Sep 2014 18:38:14 +0200
Subject: Rename to strbuf
---
src/blocks.c | 64 +++++++++++++++----------------
src/buffer.c | 86 ++++++++++++++++++++---------------------
src/buffer.h | 80 +++++++++++++++++++-------------------
src/chunk.h | 4 +-
src/html/houdini.h | 22 +++++------
src/html/houdini_href_e.c | 12 +++---
src/html/houdini_html_e.c | 10 ++---
src/html/html.c | 98 +++++++++++++++++++++++------------------------
src/inlines.c | 50 ++++++++++++------------
src/main.c | 4 +-
src/stmd.h | 16 ++++----
src/utf8.c | 6 +--
12 files changed, 226 insertions(+), 226 deletions(-)
(limited to 'src/buffer.h')
diff --git a/src/blocks.c b/src/blocks.c
index cf0e9e4..9faccd9 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -10,7 +10,7 @@
#define peek_at(i, n) (i)->data[n]
-static void incorporate_line(gh_buf *ln, int line_number, block** curptr);
+static void incorporate_line(strbuf *ln, int line_number, block** curptr);
static void finalize(block* b, int line_number);
static block* make_block(int tag, int start_line, int start_column)
@@ -28,7 +28,7 @@ static block* make_block(int tag, int start_line, int start_column)
e->parent = NULL;
e->top = NULL;
e->attributes.refmap = NULL;
- gh_buf_init(&e->string_content, 32);
+ strbuf_init(&e->string_content, 32);
e->inline_content = NULL;
e->next = NULL;
e->prev = NULL;
@@ -49,7 +49,7 @@ extern block* make_document()
}
// Returns true if line has only space characters, else false.
-bool is_blank(gh_buf *s, int offset)
+bool is_blank(strbuf *s, int offset)
{
while (offset < s->size) {
switch (s->ptr[offset]) {
@@ -85,10 +85,10 @@ static inline bool accepts_lines(int block_type)
static void add_line(block* block, chunk *ch, int offset)
{
assert(block->open);
- gh_buf_put(&block->string_content, ch->data + offset, ch->len - offset);
+ strbuf_put(&block->string_content, ch->data + offset, ch->len - offset);
}
-static void remove_trailing_blank_lines(gh_buf *ln)
+static void remove_trailing_blank_lines(strbuf *ln)
{
int i;
@@ -100,13 +100,13 @@ static void remove_trailing_blank_lines(gh_buf *ln)
}
if (i < 0) {
- gh_buf_clear(ln);
+ strbuf_clear(ln);
return;
}
- i = gh_buf_strchr(ln, '\n', i);
+ i = strbuf_strchr(ln, '\n', i);
if (i >= 0)
- gh_buf_truncate(ln, i);
+ strbuf_truncate(ln, i);
}
// Check to see if a block ends with a blank line, descending
@@ -164,10 +164,10 @@ static void finalize(block* b, int line_number)
switch (b->tag) {
case paragraph:
pos = 0;
- while (gh_buf_at(&b->string_content, 0) == '[' &&
+ while (strbuf_at(&b->string_content, 0) == '[' &&
(pos = parse_reference(&b->string_content, b->top->attributes.refmap))) {
- gh_buf_drop(&b->string_content, pos);
+ strbuf_drop(&b->string_content, pos);
}
if (is_blank(&b->string_content, 0)) {
b->tag = reference_def;
@@ -176,23 +176,23 @@ static void finalize(block* b, int line_number)
case indented_code:
remove_trailing_blank_lines(&b->string_content);
- gh_buf_putc(&b->string_content, '\n');
+ strbuf_putc(&b->string_content, '\n');
break;
case fenced_code:
// first line of contents becomes info
- firstlinelen = gh_buf_strchr(&b->string_content, '\n', 0);
+ firstlinelen = strbuf_strchr(&b->string_content, '\n', 0);
- gh_buf_init(&b->attributes.fenced_code_data.info, 0);
- gh_buf_set(
+ strbuf_init(&b->attributes.fenced_code_data.info, 0);
+ strbuf_set(
&b->attributes.fenced_code_data.info,
b->string_content.ptr,
firstlinelen
);
- gh_buf_drop(&b->string_content, firstlinelen + 1);
+ strbuf_drop(&b->string_content, firstlinelen + 1);
- gh_buf_trim(&b->attributes.fenced_code_data.info);
+ strbuf_trim(&b->attributes.fenced_code_data.info);
unescape_buffer(&b->attributes.fenced_code_data.info);
break;
@@ -265,9 +265,9 @@ extern void free_blocks(block* e)
while (e != NULL) {
next = e->next;
free_inlines(e->inline_content);
- gh_buf_free(&e->string_content);
+ strbuf_free(&e->string_content);
if (e->tag == fenced_code) {
- gh_buf_free(&e->attributes.fenced_code_data.info);
+ strbuf_free(&e->attributes.fenced_code_data.info);
} else if (e->tag == document) {
free_reference_map(e->attributes.refmap);
}
@@ -287,7 +287,7 @@ void process_inlines(block* cur, reference** refmap)
case setext_header:
cur->inline_content = parse_inlines(&cur->string_content, refmap);
// MEM
- // gh_buf_free(&cur->string_content);
+ // strbuf_free(&cur->string_content);
break;
default:
@@ -369,7 +369,7 @@ static int lists_match(struct ListData list_data,
list_data.bullet_char == item_data.bullet_char);
}
-static void expand_tabs(gh_buf *ob, const unsigned char *line, size_t size)
+static void expand_tabs(strbuf *ob, const unsigned char *line, size_t size)
{
size_t i = 0, tab = 0;
@@ -381,13 +381,13 @@ static void expand_tabs(gh_buf *ob, const unsigned char *line, size_t size)
}
if (i > org)
- gh_buf_put(ob, line + org, i - org);
+ strbuf_put(ob, line + org, i - org);
if (i >= size)
break;
do {
- gh_buf_putc(ob, ' '); tab++;
+ strbuf_putc(ob, ' '); tab++;
} while (tab % 4);
i++;
@@ -409,7 +409,7 @@ static block *finalize_document(block *document, int linenum)
extern block *stmd_parse_file(FILE *f)
{
- gh_buf line = GH_BUF_INIT;
+ strbuf line = GH_BUF_INIT;
unsigned char buffer[4096];
int linenum = 1;
block *document = make_document();
@@ -417,17 +417,17 @@ extern block *stmd_parse_file(FILE *f)
while (fgets((char *)buffer, sizeof(buffer), f)) {
expand_tabs(&line, buffer, strlen((char *)buffer));
incorporate_line(&line, linenum, &document);
- gh_buf_clear(&line);
+ strbuf_clear(&line);
linenum++;
}
- gh_buf_free(&line);
+ strbuf_free(&line);
return finalize_document(document, linenum);
}
extern block *stmd_parse_document(const unsigned char *buffer, size_t len)
{
- gh_buf line = GH_BUF_INIT;
+ strbuf line = GH_BUF_INIT;
int linenum = 1;
const unsigned char *end = buffer + len;
block *document = make_document();
@@ -444,11 +444,11 @@ extern block *stmd_parse_document(const unsigned char *buffer, size_t len)
}
incorporate_line(&line, linenum, &document);
- gh_buf_clear(&line);
+ strbuf_clear(&line);
linenum++;
}
- gh_buf_free(&line);
+ strbuf_free(&line);
return finalize_document(document, linenum);
}
@@ -471,7 +471,7 @@ static void chop_trailing_hashtags(chunk *ch)
}
// Process one line at a time, modifying a block.
-static void incorporate_line(gh_buf *line, int line_number, block** curptr)
+static void incorporate_line(strbuf *line, int line_number, block** curptr)
{
block* last_matched_container;
int offset = 0;
@@ -639,8 +639,8 @@ static void incorporate_line(gh_buf *line, int line_number, block** curptr)
} else if (container->tag == paragraph &&
(lev = scan_setext_header_line(&input, first_nonspace)) &&
// check that there is only one line in the paragraph:
- gh_buf_strrchr(&container->string_content, '\n',
- gh_buf_len(&container->string_content) - 2) < 0) {
+ strbuf_strrchr(&container->string_content, '\n',
+ strbuf_len(&container->string_content) - 2) < 0) {
container->tag = setext_header;
container->attributes.header_level = lev;
@@ -734,7 +734,7 @@ static void incorporate_line(gh_buf *line, int line_number, block** curptr)
container == last_matched_container &&
!blank &&
cur->tag == paragraph &&
- gh_buf_len(&cur->string_content) > 0) {
+ strbuf_len(&cur->string_content) > 0) {
add_line(cur, &input, offset);
diff --git a/src/buffer.c b/src/buffer.c
index dc4a405..90c2186 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -9,32 +9,32 @@
#include "buffer.h"
-/* Used as default value for gh_buf->ptr so that people can always
- * assume ptr is non-NULL and zero terminated even for new gh_bufs.
+/* Used as default value for strbuf->ptr so that people can always
+ * assume ptr is non-NULL and zero terminated even for new strbufs.
*/
-unsigned char gh_buf__initbuf[1];
-unsigned char gh_buf__oom[1];
+unsigned char strbuf__initbuf[1];
+unsigned char strbuf__oom[1];
#define ENSURE_SIZE(b, d) \
- if ((d) > buf->asize && gh_buf_grow(b, (d)) < 0)\
+ if ((d) > buf->asize && strbuf_grow(b, (d)) < 0)\
return -1;
-void gh_buf_init(gh_buf *buf, int initial_size)
+void strbuf_init(strbuf *buf, int initial_size)
{
buf->asize = 0;
buf->size = 0;
- buf->ptr = gh_buf__initbuf;
+ buf->ptr = strbuf__initbuf;
if (initial_size)
- gh_buf_grow(buf, initial_size);
+ strbuf_grow(buf, initial_size);
}
-int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom)
+int strbuf_try_grow(strbuf *buf, int target_size, bool mark_oom)
{
unsigned char *new_ptr;
int new_size;
- if (buf->ptr == gh_buf__oom)
+ if (buf->ptr == strbuf__oom)
return -1;
if (target_size <= buf->asize)
@@ -60,7 +60,7 @@ int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom)
if (!new_ptr) {
if (mark_oom)
- buf->ptr = gh_buf__oom;
+ buf->ptr = strbuf__oom;
return -1;
}
@@ -75,17 +75,17 @@ int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom)
return 0;
}
-void gh_buf_free(gh_buf *buf)
+void strbuf_free(strbuf *buf)
{
if (!buf) return;
- if (buf->ptr != gh_buf__initbuf && buf->ptr != gh_buf__oom)
+ if (buf->ptr != strbuf__initbuf && buf->ptr != strbuf__oom)
free(buf->ptr);
- gh_buf_init(buf, 0);
+ strbuf_init(buf, 0);
}
-void gh_buf_clear(gh_buf *buf)
+void strbuf_clear(strbuf *buf)
{
buf->size = 0;
@@ -93,10 +93,10 @@ void gh_buf_clear(gh_buf *buf)
buf->ptr[0] = '\0';
}
-int gh_buf_set(gh_buf *buf, const unsigned char *data, int len)
+int strbuf_set(strbuf *buf, const unsigned char *data, int len)
{
if (len <= 0 || data == NULL) {
- gh_buf_clear(buf);
+ strbuf_clear(buf);
} else {
if (data != buf->ptr) {
ENSURE_SIZE(buf, len + 1);
@@ -108,14 +108,14 @@ int gh_buf_set(gh_buf *buf, const unsigned char *data, int len)
return 0;
}
-int gh_buf_sets(gh_buf *buf, const char *string)
+int strbuf_sets(strbuf *buf, const char *string)
{
- return gh_buf_set(buf,
+ return strbuf_set(buf,
(const unsigned char *)string,
string ? strlen(string) : 0);
}
-int gh_buf_putc(gh_buf *buf, int c)
+int strbuf_putc(strbuf *buf, int c)
{
ENSURE_SIZE(buf, buf->size + 2);
buf->ptr[buf->size++] = c;
@@ -123,7 +123,7 @@ int gh_buf_putc(gh_buf *buf, int c)
return 0;
}
-int gh_buf_put(gh_buf *buf, const unsigned char *data, int len)
+int strbuf_put(strbuf *buf, const unsigned char *data, int len)
{
if (len <= 0)
return 0;
@@ -135,12 +135,12 @@ int gh_buf_put(gh_buf *buf, const unsigned char *data, int len)
return 0;
}
-int gh_buf_puts(gh_buf *buf, const char *string)
+int strbuf_puts(strbuf *buf, const char *string)
{
- return gh_buf_put(buf, (const unsigned char *)string, strlen(string));
+ return strbuf_put(buf, (const unsigned char *)string, strlen(string));
}
-int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap)
+int strbuf_vprintf(strbuf *buf, const char *format, va_list ap)
{
const int expected_size = buf->size + (strlen(format) * 2);
int len;
@@ -159,7 +159,7 @@ int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap)
if (len < 0) {
free(buf->ptr);
- buf->ptr = gh_buf__oom;
+ buf->ptr = strbuf__oom;
return -1;
}
@@ -174,19 +174,19 @@ int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap)
return 0;
}
-int gh_buf_printf(gh_buf *buf, const char *format, ...)
+int strbuf_printf(strbuf *buf, const char *format, ...)
{
int r;
va_list ap;
va_start(ap, format);
- r = gh_buf_vprintf(buf, format, ap);
+ r = strbuf_vprintf(buf, format, ap);
va_end(ap);
return r;
}
-void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf)
+void strbuf_copy_cstr(char *data, int datasize, const strbuf *buf)
{
int copylen;
@@ -204,28 +204,28 @@ void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf)
data[copylen] = '\0';
}
-void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b)
+void strbuf_swap(strbuf *buf_a, strbuf *buf_b)
{
- gh_buf t = *buf_a;
+ strbuf t = *buf_a;
*buf_a = *buf_b;
*buf_b = t;
}
-unsigned char *gh_buf_detach(gh_buf *buf)
+unsigned char *strbuf_detach(strbuf *buf)
{
unsigned char *data = buf->ptr;
- if (buf->asize == 0 || buf->ptr == gh_buf__oom)
+ if (buf->asize == 0 || buf->ptr == strbuf__oom)
return NULL;
- gh_buf_init(buf, 0);
+ strbuf_init(buf, 0);
return data;
}
-void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize)
+void strbuf_attach(strbuf *buf, unsigned char *ptr, int asize)
{
- gh_buf_free(buf);
+ strbuf_free(buf);
if (ptr) {
buf->ptr = ptr;
@@ -235,18 +235,18 @@ void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize)
else /* pass 0 to fall back on strlen + 1 */
buf->asize = buf->size + 1;
} else {
- gh_buf_grow(buf, asize);
+ strbuf_grow(buf, asize);
}
}
-int gh_buf_cmp(const gh_buf *a, const gh_buf *b)
+int strbuf_cmp(const strbuf *a, const strbuf *b)
{
int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size));
return (result != 0) ? result :
(a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
}
-int gh_buf_strchr(const gh_buf *buf, int c, int pos)
+int strbuf_strchr(const strbuf *buf, int c, int pos)
{
const unsigned char *p = memchr(buf->ptr + pos, c, buf->size - pos);
if (!p)
@@ -255,7 +255,7 @@ int gh_buf_strchr(const gh_buf *buf, int c, int pos)
return (int)(p - (const unsigned char *)buf->ptr);
}
-int gh_buf_strrchr(const gh_buf *buf, int c, int pos)
+int strbuf_strrchr(const strbuf *buf, int c, int pos)
{
int i;
@@ -267,7 +267,7 @@ int gh_buf_strrchr(const gh_buf *buf, int c, int pos)
return -1;
}
-void gh_buf_truncate(gh_buf *buf, int len)
+void strbuf_truncate(strbuf *buf, int len)
{
if (len < buf->size) {
buf->size = len;
@@ -275,7 +275,7 @@ void gh_buf_truncate(gh_buf *buf, int len)
}
}
-void gh_buf_drop(gh_buf *buf, int n)
+void strbuf_drop(strbuf *buf, int n)
{
if (n > 0) {
buf->size = buf->size - n;
@@ -286,7 +286,7 @@ void gh_buf_drop(gh_buf *buf, int n)
}
}
-void gh_buf_trim(gh_buf *buf)
+void strbuf_trim(strbuf *buf)
{
int i = 0;
@@ -296,7 +296,7 @@ void gh_buf_trim(gh_buf *buf)
while (i < buf->size && isspace(buf->ptr[i]))
i++;
- gh_buf_drop(buf, i);
+ strbuf_drop(buf, i);
/* rtrim */
while (buf->size > 0) {
diff --git a/src/buffer.h b/src/buffer.h
index 0d5143e..6f45cbb 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -9,20 +9,20 @@
typedef struct {
unsigned char *ptr;
int asize, size;
-} gh_buf;
+} strbuf;
-extern unsigned char gh_buf__initbuf[];
-extern unsigned char gh_buf__oom[];
+extern unsigned char strbuf__initbuf[];
+extern unsigned char strbuf__oom[];
-#define GH_BUF_INIT { gh_buf__initbuf, 0, 0 }
+#define GH_BUF_INIT { strbuf__initbuf, 0, 0 }
/**
- * Initialize a gh_buf structure.
+ * Initialize a strbuf structure.
*
* For the cases where GH_BUF_INIT cannot be used to do static
* initialization.
*/
-extern void gh_buf_init(gh_buf *buf, int initial_size);
+extern void strbuf_init(strbuf *buf, int initial_size);
/**
* Attempt to grow the buffer to hold at least `target_size` bytes.
@@ -32,7 +32,7 @@ extern void gh_buf_init(gh_buf *buf, int initial_size);
* existing buffer content will be preserved, but calling code must handle
* that buffer was not expanded.
*/
-extern int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom);
+extern int strbuf_try_grow(strbuf *buf, int target_size, bool mark_oom);
/**
* Grow the buffer to hold at least `target_size` bytes.
@@ -42,71 +42,71 @@ extern int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom);
*
* @return 0 on success or -1 on failure
*/
-static inline int gh_buf_grow(gh_buf *buf, int target_size)
+static inline int strbuf_grow(strbuf *buf, int target_size)
{
- return gh_buf_try_grow(buf, target_size, true);
+ return strbuf_try_grow(buf, target_size, true);
}
-extern void gh_buf_free(gh_buf *buf);
-extern void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b);
+extern void strbuf_free(strbuf *buf);
+extern void strbuf_swap(strbuf *buf_a, strbuf *buf_b);
/**
- * Test if there have been any reallocation failures with this gh_buf.
+ * Test if there have been any reallocation failures with this strbuf.
*
- * Any function that writes to a gh_buf can fail due to memory allocation
- * issues. If one fails, the gh_buf will be marked with an OOM error and
- * further calls to modify the buffer will fail. Check gh_buf_oom() at the
+ * Any function that writes to a strbuf can fail due to memory allocation
+ * issues. If one fails, the strbuf will be marked with an OOM error and
+ * further calls to modify the buffer will fail. Check strbuf_oom() at the
* end of your sequence and it will be true if you ran out of memory at any
* point with that buffer.
*
* @return false if no error, true if allocation error
*/
-static inline bool gh_buf_oom(const gh_buf *buf)
+static inline bool strbuf_oom(const strbuf *buf)
{
- return (buf->ptr == gh_buf__oom);
+ return (buf->ptr == strbuf__oom);
}
-static inline size_t gh_buf_len(const gh_buf *buf)
+static inline size_t strbuf_len(const strbuf *buf)
{
return buf->size;
}
-extern int gh_buf_cmp(const gh_buf *a, const gh_buf *b);
+extern int strbuf_cmp(const strbuf *a, const strbuf *b);
-extern void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize);
-extern unsigned char *gh_buf_detach(gh_buf *buf);
-extern void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf);
+extern void strbuf_attach(strbuf *buf, unsigned char *ptr, int asize);
+extern unsigned char *strbuf_detach(strbuf *buf);
+extern void strbuf_copy_cstr(char *data, int datasize, const strbuf *buf);
-static inline const char *gh_buf_cstr(const gh_buf *buf)
+static inline const char *strbuf_cstr(const strbuf *buf)
{
return (char *)buf->ptr;
}
-#define gh_buf_at(buf, n) ((buf)->ptr[n])
+#define strbuf_at(buf, n) ((buf)->ptr[n])
/*
* Functions below that return int value error codes will return 0 on
* success or -1 on failure (which generally means an allocation failed).
- * Using a gh_buf where the allocation has failed with result in -1 from
+ * Using a strbuf where the allocation has failed with result in -1 from
* all further calls using that buffer. As a result, you can ignore the
* return code of these functions and call them in a series then just call
- * gh_buf_oom at the end.
+ * strbuf_oom at the end.
*/
-extern int gh_buf_set(gh_buf *buf, const unsigned char *data, int len);
-extern int gh_buf_sets(gh_buf *buf, const char *string);
-extern int gh_buf_putc(gh_buf *buf, int c);
-extern int gh_buf_put(gh_buf *buf, const unsigned char *data, int len);
-extern int gh_buf_puts(gh_buf *buf, const char *string);
-extern int gh_buf_printf(gh_buf *buf, const char *format, ...)
+extern int strbuf_set(strbuf *buf, const unsigned char *data, int len);
+extern int strbuf_sets(strbuf *buf, const char *string);
+extern int strbuf_putc(strbuf *buf, int c);
+extern int strbuf_put(strbuf *buf, const unsigned char *data, int len);
+extern int strbuf_puts(strbuf *buf, const char *string);
+extern int strbuf_printf(strbuf *buf, const char *format, ...)
__attribute__((format (printf, 2, 3)));
-extern int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap);
-extern void gh_buf_clear(gh_buf *buf);
-
-int gh_buf_strchr(const gh_buf *buf, int c, int pos);
-int gh_buf_strrchr(const gh_buf *buf, int c, int pos);
-void gh_buf_drop(gh_buf *buf, int n);
-void gh_buf_truncate(gh_buf *buf, int len);
-void gh_buf_trim(gh_buf *buf);
+extern int strbuf_vprintf(strbuf *buf, const char *format, va_list ap);
+extern void strbuf_clear(strbuf *buf);
+
+int strbuf_strchr(const strbuf *buf, int c, int pos);
+int strbuf_strrchr(const strbuf *buf, int c, int pos);
+void strbuf_drop(strbuf *buf, int n);
+void strbuf_truncate(strbuf *buf, int len);
+void strbuf_trim(strbuf *buf);
#endif
diff --git a/src/chunk.h b/src/chunk.h
index f3841ed..f37a2f3 100644
--- a/src/chunk.h
+++ b/src/chunk.h
@@ -78,12 +78,12 @@ static inline chunk chunk_dup(const chunk *ch, int pos, int len)
return c;
}
-static inline chunk chunk_buf_detach(gh_buf *buf)
+static inline chunk chunk_buf_detach(strbuf *buf)
{
chunk c;
c.len = buf->size;
- c.data = gh_buf_detach(buf);
+ c.data = strbuf_detach(buf);
c.alloc = 1;
return c;
diff --git a/src/html/houdini.h b/src/html/houdini.h
index 31fe917..1e54d20 100644
--- a/src/html/houdini.h
+++ b/src/html/houdini.h
@@ -25,17 +25,17 @@ extern "C" {
#define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
#define HOUDINI_UNESCAPED_SIZE(x) (x)
-extern int houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure);
-extern int houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_unescape_js(gh_buf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure);
+extern int houdini_unescape_html(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_xml(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_uri(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_url(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_unescape_uri(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_unescape_url(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_js(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_unescape_js(strbuf *ob, const uint8_t *src, size_t size);
#ifdef __cplusplus
}
diff --git a/src/html/houdini_href_e.c b/src/html/houdini_href_e.c
index b2a7d79..12456ce 100644
--- a/src/html/houdini_href_e.c
+++ b/src/html/houdini_href_e.c
@@ -49,7 +49,7 @@ static const char HREF_SAFE[] = {
};
int
-houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
+houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size)
{
static const uint8_t hex_chars[] = "0123456789ABCDEF";
size_t i = 0, org;
@@ -63,7 +63,7 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
i++;
if (likely(i > org))
- gh_buf_put(ob, src + org, i - org);
+ strbuf_put(ob, src + org, i - org);
/* escaping */
if (i >= size)
@@ -73,14 +73,14 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
/* amp appears all the time in URLs, but needs
* HTML-entity escaping to be inside an href */
case '&':
- gh_buf_puts(ob, "&");
+ strbuf_puts(ob, "&");
break;
/* the single quote is a valid URL character
* according to the standard; it needs HTML
* entity escaping too */
case '\'':
- gh_buf_puts(ob, "'");
+ strbuf_puts(ob, "'");
break;
/* the space can be escaped to %20 or a plus
@@ -89,7 +89,7 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
* when building GET strings */
#if 0
case ' ':
- gh_buf_putc(ob, '+');
+ strbuf_putc(ob, '+');
break;
#endif
@@ -97,7 +97,7 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
default:
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
hex_str[2] = hex_chars[src[i] & 0xF];
- gh_buf_put(ob, hex_str, 3);
+ strbuf_put(ob, hex_str, 3);
}
i++;
diff --git a/src/html/houdini_html_e.c b/src/html/houdini_html_e.c
index 95b6c41..f2e86fe 100644
--- a/src/html/houdini_html_e.c
+++ b/src/html/houdini_html_e.c
@@ -45,7 +45,7 @@ static const char *HTML_ESCAPES[] = {
};
int
-houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
+houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure)
{
size_t i = 0, org, esc = 0;
@@ -55,7 +55,7 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
i++;
if (i > org)
- gh_buf_put(ob, src + org, i - org);
+ strbuf_put(ob, src + org, i - org);
/* escaping */
if (unlikely(i >= size))
@@ -63,9 +63,9 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
/* The forward slash is only escaped in secure mode */
if ((src[i] == '/' || src[i] == '\'') && !secure) {
- gh_buf_putc(ob, src[i]);
+ strbuf_putc(ob, src[i]);
} else {
- gh_buf_puts(ob, HTML_ESCAPES[esc]);
+ strbuf_puts(ob, HTML_ESCAPES[esc]);
}
i++;
@@ -75,7 +75,7 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
}
int
-houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size)
+houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size)
{
return houdini_escape_html0(ob, src, size, 1);
}
diff --git a/src/html/html.c b/src/html/html.c
index 41b8fda..a9356dd 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -10,7 +10,7 @@
// Functions to convert block and inline lists to HTML strings.
-static void escape_html(gh_buf *dest, const unsigned char *source, int length)
+static void escape_html(strbuf *dest, const unsigned char *source, int length)
{
if (length < 0)
length = strlen((char *)source);
@@ -18,7 +18,7 @@ static void escape_html(gh_buf *dest, const unsigned char *source, int length)
houdini_escape_html0(dest, source, (size_t)length, 0);
}
-static void escape_href(gh_buf *dest, const unsigned char *source, int length)
+static void escape_href(strbuf *dest, const unsigned char *source, int length)
{
if (length < 0)
length = strlen((char *)source);
@@ -26,14 +26,14 @@ static void escape_href(gh_buf *dest, const unsigned char *source, int length)
houdini_escape_href(dest, source, (size_t)length);
}
-static inline void cr(gh_buf *html)
+static inline void cr(strbuf *html)
{
if (html->size && html->ptr[html->size - 1] != '\n')
- gh_buf_putc(html, '\n');
+ strbuf_putc(html, '\n');
}
// Convert a block list to HTML. Returns 0 on success, and sets result.
-void blocks_to_html(gh_buf *html, block *b, bool tight)
+void blocks_to_html(strbuf *html, block *b, bool tight)
{
struct ListData *data;
@@ -48,25 +48,25 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
inlines_to_html(html, b->inline_content);
} else {
cr(html);
- gh_buf_puts(html, "");
+ strbuf_puts(html, "
");
inlines_to_html(html, b->inline_content);
- gh_buf_puts(html, "
\n");
+ strbuf_puts(html, "
\n");
}
break;
case block_quote:
cr(html);
- gh_buf_puts(html, "\n");
+ strbuf_puts(html, "\n");
blocks_to_html(html, b->children, false);
- gh_buf_puts(html, "
\n");
+ strbuf_puts(html, "
\n");
break;
case list_item:
cr(html);
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
blocks_to_html(html, b->children, tight);
- gh_buf_trim(html); /* TODO: rtrim */
- gh_buf_puts(html, "\n");
+ strbuf_trim(html); /* TODO: rtrim */
+ strbuf_puts(html, "\n");
break;
case list:
@@ -75,58 +75,58 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
data = &(b->attributes.list_data);
if (data->start > 1) {
- gh_buf_printf(html, "<%s start=\"%d\">\n",
+ strbuf_printf(html, "<%s start=\"%d\">\n",
data->list_type == bullet ? "ul" : "ol",
data->start);
} else {
- gh_buf_puts(html, data->list_type == bullet ? "\n" : "\n");
+ strbuf_puts(html, data->list_type == bullet ? "\n" : "\n");
}
blocks_to_html(html, b->children, data->tight);
- gh_buf_puts(html, data->list_type == bullet ? "
" : "
");
- gh_buf_putc(html, '\n');
+ strbuf_puts(html, data->list_type == bullet ? "
" : "");
+ strbuf_putc(html, '\n');
break;
case atx_header:
case setext_header:
cr(html);
- gh_buf_printf(html, "", b->attributes.header_level);
+ strbuf_printf(html, "", b->attributes.header_level);
inlines_to_html(html, b->inline_content);
- gh_buf_printf(html, "\n", b->attributes.header_level);
+ strbuf_printf(html, "\n", b->attributes.header_level);
break;
case indented_code:
case fenced_code:
cr(html);
- gh_buf_puts(html, "tag == fenced_code) {
- gh_buf *info = &b->attributes.fenced_code_data.info;
+ strbuf *info = &b->attributes.fenced_code_data.info;
- if (gh_buf_len(info) > 0) {
- int first_tag = gh_buf_strchr(info, ' ', 0);
+ if (strbuf_len(info) > 0) {
+ int first_tag = strbuf_strchr(info, ' ', 0);
if (first_tag < 0)
- first_tag = gh_buf_len(info);
+ first_tag = strbuf_len(info);
- gh_buf_puts(html, " class=\"");
+ strbuf_puts(html, " class=\"");
escape_html(html, info->ptr, first_tag);
- gh_buf_putc(html, '"');
+ strbuf_putc(html, '"');
}
}
- gh_buf_puts(html, ">");
+ strbuf_puts(html, ">");
escape_html(html, b->string_content.ptr, b->string_content.size);
- gh_buf_puts(html, "
\n");
+ strbuf_puts(html, "\n");
break;
case html_block:
- gh_buf_put(html, b->string_content.ptr, b->string_content.size);
+ strbuf_put(html, b->string_content.ptr, b->string_content.size);
break;
case hrule:
- gh_buf_puts(html, "
\n");
+ strbuf_puts(html, "
\n");
break;
case reference_def:
@@ -141,9 +141,9 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
}
// Convert an inline list to HTML. Returns 0 on success, and sets result.
-void inlines_to_html(gh_buf *html, inl* ils)
+void inlines_to_html(strbuf *html, inl* ils)
{
- gh_buf scrap = GH_BUF_INIT;
+ strbuf scrap = GH_BUF_INIT;
while(ils != NULL) {
switch(ils->tag) {
@@ -152,70 +152,70 @@ void inlines_to_html(gh_buf *html, inl* ils)
break;
case INL_LINEBREAK:
- gh_buf_puts(html, "
\n");
+ strbuf_puts(html, "
\n");
break;
case INL_SOFTBREAK:
- gh_buf_putc(html, '\n');
+ strbuf_putc(html, '\n');
break;
case INL_CODE:
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
escape_html(html, ils->content.literal.data, ils->content.literal.len);
- gh_buf_puts(html, "
");
+ strbuf_puts(html, "
");
break;
case INL_RAW_HTML:
case INL_ENTITY:
- gh_buf_put(html,
+ strbuf_put(html,
ils->content.literal.data,
ils->content.literal.len);
break;
case INL_LINK:
- gh_buf_puts(html, "content.linkable.url)
escape_href(html, ils->content.linkable.url, -1);
if (ils->content.linkable.title) {
- gh_buf_puts(html, "\" title=\"");
+ strbuf_puts(html, "\" title=\"");
escape_html(html, ils->content.linkable.title, -1);
}
- gh_buf_puts(html, "\">");
+ strbuf_puts(html, "\">");
inlines_to_html(html, ils->content.inlines);
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
break;
case INL_IMAGE:
- gh_buf_puts(html, "
content.linkable.url)
escape_href(html, ils->content.linkable.url, -1);
inlines_to_html(&scrap, ils->content.inlines);
- gh_buf_puts(html, "\" alt=\"");
+ strbuf_puts(html, "\" alt=\"");
if (scrap.size)
escape_html(html, scrap.ptr, scrap.size);
- gh_buf_clear(&scrap);
+ strbuf_clear(&scrap);
if (ils->content.linkable.title) {
- gh_buf_puts(html, "\" title=\"");
+ strbuf_puts(html, "\" title=\"");
escape_html(html, ils->content.linkable.title, -1);
}
- gh_buf_puts(html, "\"/>");
+ strbuf_puts(html, "\"/>");
break;
case INL_STRONG:
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
inlines_to_html(html, ils->content.inlines);
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
break;
case INL_EMPH:
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
inlines_to_html(html, ils->content.inlines);
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
break;
}
ils = ils->next;
diff --git a/src/inlines.c b/src/inlines.c
index 8e2e683..33973df 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -25,7 +25,7 @@ inline static void chunk_free(chunk *c);
inline static void chunk_trim(chunk *c);
inline static chunk chunk_literal(const char *data);
-inline static chunk chunk_buf_detach(gh_buf *buf);
+inline static chunk chunk_buf_detach(strbuf *buf);
inline static chunk chunk_dup(const chunk *ch, int pos, int len);
static inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
@@ -33,10 +33,10 @@ static inl *parse_inlines_while(subject* subj, int (*f)(subject*));
static int parse_inline(subject* subj, inl ** last);
static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap);
-static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap);
+static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap);
static int subject_find_special_char(subject *subj);
-static void normalize_whitespace(gh_buf *s);
+static void normalize_whitespace(strbuf *s);
extern void free_reference(reference *ref) {
free(ref->label);
@@ -62,13 +62,13 @@ extern void free_reference_map(reference **refmap) {
// remove leading/trailing whitespace, case fold
static unsigned char *normalize_reference(chunk *ref)
{
- gh_buf normalized = GH_BUF_INIT;
+ strbuf normalized = GH_BUF_INIT;
utf8proc_case_fold(&normalized, ref->data, ref->len);
- gh_buf_trim(&normalized);
+ strbuf_trim(&normalized);
normalize_whitespace(&normalized);
- return gh_buf_detach(&normalized);
+ return strbuf_detach(&normalized);
}
// Returns reference if refmap contains a reference with matching
@@ -218,7 +218,7 @@ inline static inl* append_inlines(inl* a, inl* b)
return a;
}
-static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap)
+static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap)
{
e->input.data = buffer->ptr;
e->input.len = buffer->size;
@@ -309,7 +309,7 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
// Destructively modify string, collapsing consecutive
// space and newline characters into a single space.
-static void normalize_whitespace(gh_buf *s)
+static void normalize_whitespace(strbuf *s)
{
bool last_char_was_space = false;
int r, w;
@@ -331,7 +331,7 @@ static void normalize_whitespace(gh_buf *s)
}
}
- gh_buf_truncate(s, w);
+ strbuf_truncate(s, w);
}
// Parse backtick code section or raw backticks, return an inline.
@@ -346,10 +346,10 @@ static inl* handle_backticks(subject *subj)
subj->pos = startpos; // rewind
return make_str(openticks);
} else {
- gh_buf buf = GH_BUF_INIT;
+ strbuf buf = GH_BUF_INIT;
- gh_buf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
- gh_buf_trim(&buf);
+ strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
+ strbuf_trim(&buf);
normalize_whitespace(&buf);
return make_code(chunk_buf_detach(&buf));
@@ -569,7 +569,7 @@ static inl *make_str_with_entities(chunk *content)
}
// Destructively unescape a string: remove backslashes before punctuation chars.
-extern void unescape_buffer(gh_buf *buf)
+extern void unescape_buffer(strbuf *buf)
{
int r, w;
@@ -580,14 +580,14 @@ extern void unescape_buffer(gh_buf *buf)
buf->ptr[w++] = buf->ptr[r];
}
- gh_buf_truncate(buf, w);
+ strbuf_truncate(buf, w);
}
// Clean a URL: remove surrounding whitespace and surrounding <>,
// and remove \ that escape punctuation.
static unsigned char *clean_url(chunk *url, int is_email)
{
- gh_buf buf = GH_BUF_INIT;
+ strbuf buf = GH_BUF_INIT;
chunk_trim(url);
@@ -595,22 +595,22 @@ static unsigned char *clean_url(chunk *url, int is_email)
return NULL;
if (is_email)
- gh_buf_puts(&buf, "mailto:");
+ strbuf_puts(&buf, "mailto:");
if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
- gh_buf_put(&buf, url->data + 1, url->len - 2);
+ strbuf_put(&buf, url->data + 1, url->len - 2);
} else {
- gh_buf_put(&buf, url->data, url->len);
+ strbuf_put(&buf, url->data, url->len);
}
unescape_buffer(&buf);
- return gh_buf_detach(&buf);
+ return strbuf_detach(&buf);
}
// Clean a title: remove surrounding quotes and remove \ that escape punctuation.
static unsigned char *clean_title(chunk *title)
{
- gh_buf buf = GH_BUF_INIT;
+ strbuf buf = GH_BUF_INIT;
unsigned char first, last;
if (title->len == 0)
@@ -623,13 +623,13 @@ static unsigned char *clean_title(chunk *title)
if ((first == '\'' && last == '\'') ||
(first == '(' && last == ')') ||
(first == '"' && last == '"')) {
- gh_buf_set(&buf, title->data + 1, title->len - 2);
+ strbuf_set(&buf, title->data + 1, title->len - 2);
} else {
- gh_buf_set(&buf, title->data, title->len);
+ strbuf_set(&buf, title->data, title->len);
}
unescape_buffer(&buf);
- return gh_buf_detach(&buf);
+ return strbuf_detach(&buf);
}
// Parse an autolink or HTML tag.
@@ -971,7 +971,7 @@ static int parse_inline(subject* subj, inl ** last)
return 1;
}
-extern inl* parse_inlines(gh_buf *input, reference** refmap)
+extern inl* parse_inlines(strbuf *input, reference** refmap)
{
subject subj;
subject_from_buf(&subj, input, refmap);
@@ -993,7 +993,7 @@ void spnl(subject* subj)
// Modify refmap if a reference is encountered.
// Return 0 if no reference found, otherwise position of subject
// after reference is parsed.
-extern int parse_reference(gh_buf *input, reference** refmap)
+extern int parse_reference(strbuf *input, reference** refmap)
{
subject subj;
diff --git a/src/main.c b/src/main.c
index e1abedc..7cf67e2 100644
--- a/src/main.c
+++ b/src/main.c
@@ -14,14 +14,14 @@ void print_usage()
static void print_document(block *document, bool ast)
{
- gh_buf html = GH_BUF_INIT;
+ strbuf html = GH_BUF_INIT;
if (ast) {
print_blocks(document, 0);
} else {
blocks_to_html(&html, document, false);
printf("%s", html.ptr);
- gh_buf_free(&html);
+ strbuf_free(&html);
}
}
diff --git a/src/stmd.h b/src/stmd.h
index 4a3c399..2e86f3a 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -50,7 +50,7 @@ struct FencedCodeData {
int fence_length;
int fence_offset;
char fence_char;
- gh_buf info;
+ strbuf info;
};
typedef struct Block {
@@ -76,7 +76,7 @@ typedef struct Block {
struct Block* last_child;
struct Block* parent;
struct Block* top;
- gh_buf string_content;
+ strbuf string_content;
inl* inline_content;
union {
struct ListData list_data;
@@ -88,15 +88,15 @@ typedef struct Block {
struct Block * prev;
} block;
-inl* parse_inlines(gh_buf *input, reference** refmap);
+inl* parse_inlines(strbuf *input, reference** refmap);
void free_inlines(inl* e);
-int parse_reference(gh_buf *input, reference** refmap);
+int parse_reference(strbuf *input, reference** refmap);
void free_reference(reference *ref);
void free_reference_map(reference **refmap);
void add_reference(reference** refmap, reference* ref);
-void unescape_buffer(gh_buf *buf);
+void unescape_buffer(strbuf *buf);
extern block* make_document();
extern block* add_child(block* parent,
@@ -109,9 +109,9 @@ extern block *stmd_parse_file(FILE *f);
void print_inlines(inl* ils, int indent);
void print_blocks(block* blk, int indent);
-void blocks_to_html(gh_buf *html, block *b, bool tight);
-void inlines_to_html(gh_buf *html, inl *b);
+void blocks_to_html(strbuf *html, block *b, bool tight);
+void inlines_to_html(strbuf *html, inl *b);
-void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len);
+void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len);
#endif
diff --git a/src/utf8.c b/src/utf8.c
index 32c78a4..cebd872 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -84,7 +84,7 @@ ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst)
return length;
}
-void utf8proc_encode_char(int32_t uc, gh_buf *buf)
+void utf8proc_encode_char(int32_t uc, strbuf *buf)
{
unsigned char dst[4];
int len = 0;
@@ -119,10 +119,10 @@ void utf8proc_encode_char(int32_t uc, gh_buf *buf)
assert(false);
}
- gh_buf_put(buf, dst, len);
+ strbuf_put(buf, dst, len);
}
-void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len)
+void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len)
{
int32_t c;
--
cgit v1.2.3
From 94a79a605f3e76a43f1f87a5044f6761b99e5ca5 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Wed, 10 Sep 2014 18:33:27 +0200
Subject: Cleanup reference implementation
---
Makefile | 2 +-
src/blocks.c | 16 ++---
src/buffer.c | 43 ++++++++++++++
src/buffer.h | 2 +
src/inlines.c | 176 +++++++------------------------------------------------
src/references.c | 109 ++++++++++++++++++++++++++++++++++
src/references.h | 27 +++++++++
src/stmd.h | 26 +++-----
src/utf8.c | 10 ++--
src/utf8.h | 5 +-
10 files changed, 225 insertions(+), 191 deletions(-)
create mode 100644 src/references.c
create mode 100644 src/references.h
(limited to 'src/buffer.h')
diff --git a/Makefile b/Makefile
index 5d13272..11e2141 100644
--- a/Makefile
+++ b/Makefile
@@ -42,7 +42,7 @@ benchjs:
node js/bench.js ${BENCHINP}
HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o $(SRCDIR)/html/houdini_html_u.o
-STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o
+STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o $(SRCDIR)/references.c
$(PROG): $(SRCDIR)/html/html_unescape.h $(SRCDIR)/case_fold_switch.inc $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c
$(CC) $(LDFLAGS) -o $@ $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c
diff --git a/src/blocks.c b/src/blocks.c
index 72b2dc2..30a8284 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -8,7 +8,6 @@
#include "utf8.h"
#include "html/houdini.h"
#include "scanners.h"
-#include "uthash.h"
#define peek_at(i, n) (i)->data[n]
@@ -36,12 +35,7 @@ static node_block* make_block(int tag, int start_line, int start_column)
extern node_block* make_document()
{
node_block *e = make_block(BLOCK_DOCUMENT, 1, 1);
- reference *map = NULL;
- reference ** refmap;
-
- refmap = (reference**) malloc(sizeof(reference*));
- *refmap = map;
- e->as.document.refmap = refmap;
+ e->as.document.refmap = reference_map_new();
e->top = e;
return e;
@@ -164,7 +158,7 @@ static void finalize(node_block* b, int line_number)
case BLOCK_PARAGRAPH:
pos = 0;
while (strbuf_at(&b->string_content, 0) == '[' &&
- (pos = parse_reference(&b->string_content, b->top->as.document.refmap))) {
+ (pos = parse_reference_inline(&b->string_content, b->top->as.document.refmap))) {
strbuf_drop(&b->string_content, pos);
}
@@ -192,7 +186,7 @@ static void finalize(node_block* b, int line_number)
strbuf_drop(&b->string_content, firstlinelen + 1);
strbuf_trim(&b->as.code.info);
- unescape_buffer(&b->as.code.info);
+ strbuf_unescape(&b->as.code.info);
break;
case BLOCK_LIST: // determine tight/loose status
@@ -268,7 +262,7 @@ extern void free_blocks(node_block* e)
if (e->tag == BLOCK_FENCED_CODE) {
strbuf_free(&e->as.code.info);
} else if (e->tag == BLOCK_DOCUMENT) {
- free_reference_map(e->as.document.refmap);
+ reference_map_free(e->as.document.refmap);
}
free_blocks(e->children);
free(e);
@@ -278,7 +272,7 @@ extern void free_blocks(node_block* e)
// Walk through node_block and all children, recursively, parsing
// string content into inline content where appropriate.
-void process_inlines(node_block* cur, reference** refmap)
+void process_inlines(node_block* cur, reference_map *refmap)
{
switch (cur->tag) {
case BLOCK_PARAGRAPH:
diff --git a/src/buffer.c b/src/buffer.c
index 90c2186..cdf8ca0 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -308,3 +308,46 @@ void strbuf_trim(strbuf *buf)
buf->ptr[buf->size] = '\0';
}
+
+// Destructively modify string, collapsing consecutive
+// space and newline characters into a single space.
+void strbuf_normalize_whitespace(strbuf *s)
+{
+ bool last_char_was_space = false;
+ int r, w;
+
+ for (r = 0, w = 0; r < s->size; ++r) {
+ switch (s->ptr[r]) {
+ case ' ':
+ case '\n':
+ if (last_char_was_space)
+ break;
+
+ s->ptr[w++] = ' ';
+ last_char_was_space = true;
+ break;
+
+ default:
+ s->ptr[w++] = s->ptr[r];
+ last_char_was_space = false;
+ }
+ }
+
+ strbuf_truncate(s, w);
+}
+
+// Destructively unescape a string: remove backslashes before punctuation chars.
+extern void strbuf_unescape(strbuf *buf)
+{
+ int r, w;
+
+ for (r = 0, w = 0; r < buf->size; ++r) {
+ if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1]))
+ continue;
+
+ buf->ptr[w++] = buf->ptr[r];
+ }
+
+ strbuf_truncate(buf, w);
+}
+
diff --git a/src/buffer.h b/src/buffer.h
index 6f45cbb..1bc1eee 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -108,5 +108,7 @@ int strbuf_strrchr(const strbuf *buf, int c, int pos);
void strbuf_drop(strbuf *buf, int n);
void strbuf_truncate(strbuf *buf, int len);
void strbuf_trim(strbuf *buf);
+void strbuf_normalize_whitespace(strbuf *s);
+void strbuf_unescape(strbuf *s);
#endif
diff --git a/src/inlines.c b/src/inlines.c
index aa0e13e..3040f09 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -7,110 +7,23 @@
#include "stmd.h"
#include "html/houdini.h"
#include "utf8.h"
-#include "uthash.h"
#include "scanners.h"
typedef struct Subject {
chunk input;
int pos;
- int label_nestlevel;
- reference** reference_map;
+ int label_nestlevel;
+ reference_map *refmap;
} subject;
-reference* lookup_reference(reference** refmap, chunk *label);
-reference* make_reference(chunk *label, chunk *url, chunk *title);
-
-static unsigned char *clean_url(chunk *url);
-static unsigned char *clean_title(chunk *title);
-static unsigned char *clean_autolink(chunk *url, int is_email);
-
-inline static void chunk_free(chunk *c);
-inline static void chunk_trim(chunk *c);
-
-inline static chunk chunk_literal(const char *data);
-inline static chunk chunk_buf_detach(strbuf *buf);
-inline static chunk chunk_dup(const chunk *ch, int pos, int len);
-
-static node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
+static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap);
static node_inl *parse_inlines_while(subject* subj, int (*f)(subject*));
static int parse_inline(subject* subj, node_inl ** last);
-static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap);
-static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap);
+static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap);
+static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap);
static int subject_find_special_char(subject *subj);
-static void normalize_whitespace(strbuf *s);
-
-extern void free_reference(reference *ref) {
- free(ref->label);
- free(ref->url);
- free(ref->title);
- free(ref);
-}
-
-extern void free_reference_map(reference **refmap) {
- /* free the hash table contents */
- reference *s;
- reference *tmp;
- if (refmap != NULL) {
- HASH_ITER(hh, *refmap, s, tmp) {
- HASH_DEL(*refmap, s);
- free_reference(s);
- }
- free(refmap);
- }
-}
-
-// normalize reference: collapse internal whitespace to single space,
-// remove leading/trailing whitespace, case fold
-static unsigned char *normalize_reference(chunk *ref)
-{
- strbuf normalized = GH_BUF_INIT;
-
- utf8proc_case_fold(&normalized, ref->data, ref->len);
- strbuf_trim(&normalized);
- normalize_whitespace(&normalized);
-
- return strbuf_detach(&normalized);
-}
-
-// Returns reference if refmap contains a reference with matching
-// label, otherwise NULL.
-extern reference* lookup_reference(reference** refmap, chunk *label)
-{
- reference *ref = NULL;
- unsigned char *norm = normalize_reference(label);
- if (refmap != NULL) {
- HASH_FIND_STR(*refmap, (char*)norm, ref);
- }
- free(norm);
- return ref;
-}
-
-extern reference* make_reference(chunk *label, chunk *url, chunk *title)
-{
- reference *ref;
- ref = malloc(sizeof(reference));
- ref->label = normalize_reference(label);
- ref->url = clean_url(url);
- ref->title = clean_title(title);
- return ref;
-}
-
-extern void add_reference(reference** refmap, reference* ref)
-{
- reference * t = NULL;
- const char *label = (const char *)ref->label;
-
- HASH_FIND(hh, *refmap, label, strlen(label), t);
-
- if (t == NULL) {
- HASH_ADD_KEYPTR(hh, *refmap, label, strlen(label), ref);
- } else {
- free_reference(ref); // we free this now since it won't be in the refmap
- }
-}
-
static unsigned char *bufdup(const unsigned char *buf)
{
unsigned char *new = NULL;
@@ -236,26 +149,26 @@ inline static node_inl* append_inlines(node_inl* a, node_inl* b)
return a;
}
-static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap)
+static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap)
{
e->input.data = buffer->ptr;
e->input.len = buffer->size;
e->input.alloc = 0;
e->pos = 0;
e->label_nestlevel = 0;
- e->reference_map = refmap;
+ e->refmap = refmap;
chunk_rtrim(&e->input);
}
-static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap)
+static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap)
{
e->input.data = chunk->data;
e->input.len = chunk->len;
e->input.alloc = 0;
e->pos = 0;
e->label_nestlevel = 0;
- e->reference_map = refmap;
+ e->refmap = refmap;
chunk_rtrim(&e->input);
}
@@ -325,33 +238,6 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
return (subj->pos);
}
-// Destructively modify string, collapsing consecutive
-// space and newline characters into a single space.
-static void normalize_whitespace(strbuf *s)
-{
- bool last_char_was_space = false;
- int r, w;
-
- for (r = 0, w = 0; r < s->size; ++r) {
- switch (s->ptr[r]) {
- case ' ':
- case '\n':
- if (last_char_was_space)
- break;
-
- s->ptr[w++] = ' ';
- last_char_was_space = true;
- break;
-
- default:
- s->ptr[w++] = s->ptr[r];
- last_char_was_space = false;
- }
- }
-
- strbuf_truncate(s, w);
-}
-
// Parse backtick code section or raw backticks, return an inline.
// Assumes that the subject has a backtick at the current position.
static node_inl* handle_backticks(subject *subj)
@@ -368,7 +254,7 @@ static node_inl* handle_backticks(subject *subj)
strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
strbuf_trim(&buf);
- normalize_whitespace(&buf);
+ strbuf_normalize_whitespace(&buf);
return make_code(chunk_buf_detach(&buf));
}
@@ -575,24 +461,9 @@ static node_inl *make_str_with_entities(chunk *content)
}
}
-// Destructively unescape a string: remove backslashes before punctuation chars.
-extern void unescape_buffer(strbuf *buf)
-{
- int r, w;
-
- for (r = 0, w = 0; r < buf->size; ++r) {
- if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1]))
- continue;
-
- buf->ptr[w++] = buf->ptr[r];
- }
-
- strbuf_truncate(buf, w);
-}
-
// Clean a URL: remove surrounding whitespace and surrounding <>,
// and remove \ that escape punctuation.
-static unsigned char *clean_url(chunk *url)
+unsigned char *clean_url(chunk *url)
{
strbuf buf = GH_BUF_INIT;
@@ -607,11 +478,11 @@ static unsigned char *clean_url(chunk *url)
houdini_unescape_html_f(&buf, url->data, url->len);
}
- unescape_buffer(&buf);
+ strbuf_unescape(&buf);
return strbuf_detach(&buf);
}
-static unsigned char *clean_autolink(chunk *url, int is_email)
+unsigned char *clean_autolink(chunk *url, int is_email)
{
strbuf buf = GH_BUF_INIT;
@@ -628,7 +499,7 @@ static unsigned char *clean_autolink(chunk *url, int is_email)
}
// Clean a title: remove surrounding quotes and remove \ that escape punctuation.
-static unsigned char *clean_title(chunk *title)
+unsigned char *clean_title(chunk *title)
{
strbuf buf = GH_BUF_INIT;
unsigned char first, last;
@@ -648,7 +519,7 @@ static unsigned char *clean_title(chunk *title)
houdini_unescape_html_f(&buf, title->data, title->len);
}
- unescape_buffer(&buf);
+ strbuf_unescape(&buf);
return strbuf_detach(&buf);
}
@@ -810,7 +681,7 @@ static node_inl* handle_left_bracket(subject* subj)
} else {
// if we get here, we matched a label but didn't get further:
subj->pos = endlabel;
- lab = parse_chunk_inlines(&rawlabel, subj->reference_map);
+ lab = parse_chunk_inlines(&rawlabel, subj->refmap);
result = append_inlines(make_str(chunk_literal("[")),
append_inlines(lab,
make_str(chunk_literal("]"))));
@@ -834,13 +705,13 @@ static node_inl* handle_left_bracket(subject* subj)
}
// lookup rawlabel in subject->reference_map:
- ref = lookup_reference(subj->reference_map, &reflabel);
+ ref = reference_lookup(subj->refmap, &reflabel);
if (ref != NULL) { // found
lab = parse_chunk_inlines(&rawlabel, NULL);
result = make_ref_link(lab, ref);
} else {
subj->pos = endlabel;
- lab = parse_chunk_inlines(&rawlabel, subj->reference_map);
+ lab = parse_chunk_inlines(&rawlabel, subj->refmap);
result = append_inlines(make_str(chunk_literal("[")),
append_inlines(lab, make_str(chunk_literal("]"))));
}
@@ -887,7 +758,7 @@ extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*))
return result;
}
-node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap)
+node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap)
{
subject subj;
subject_from_chunk(&subj, chunk, refmap);
@@ -987,7 +858,7 @@ static int parse_inline(subject* subj, node_inl ** last)
return 1;
}
-extern node_inl* parse_inlines(strbuf *input, reference** refmap)
+extern node_inl* parse_inlines(strbuf *input, reference_map *refmap)
{
subject subj;
subject_from_buf(&subj, input, refmap);
@@ -1009,7 +880,7 @@ void spnl(subject* subj)
// Modify refmap if a reference is encountered.
// Return 0 if no reference found, otherwise position of subject
// after reference is parsed.
-extern int parse_reference(strbuf *input, reference** refmap)
+int parse_reference_inline(strbuf *input, reference_map *refmap)
{
subject subj;
@@ -1019,7 +890,6 @@ extern int parse_reference(strbuf *input, reference** refmap)
int matchlen = 0;
int beforetitle;
- reference *new = NULL;
subject_from_buf(&subj, input, NULL);
@@ -1065,9 +935,7 @@ extern int parse_reference(strbuf *input, reference** refmap)
return 0;
}
// insert reference into refmap
- new = make_reference(&lab, &url, &title);
- add_reference(refmap, new);
-
+ reference_create(refmap, &lab, &url, &title);
return subj.pos;
}
diff --git a/src/references.c b/src/references.c
new file mode 100644
index 0000000..ff64b00
--- /dev/null
+++ b/src/references.c
@@ -0,0 +1,109 @@
+#include "stmd.h"
+#include "utf8.h"
+#include "references.h"
+
+static unsigned int
+refhash(const unsigned char *link_ref)
+{
+ unsigned int hash = 0;
+
+ while (*link_ref)
+ hash = (*link_ref++) + (hash << 6) + (hash << 16) - hash;
+
+ return hash;
+}
+
+// normalize reference: collapse internal whitespace to single space,
+// remove leading/trailing whitespace, case fold
+static unsigned char *normalize_reference(chunk *ref)
+{
+ strbuf normalized = GH_BUF_INIT;
+
+ utf8proc_case_fold(&normalized, ref->data, ref->len);
+ strbuf_trim(&normalized);
+ strbuf_normalize_whitespace(&normalized);
+
+ return strbuf_detach(&normalized);
+}
+
+static void add_reference(reference_map *map, reference* ref)
+{
+ ref->next = map->table[ref->hash % REFMAP_SIZE];
+ map->table[ref->hash % REFMAP_SIZE] = ref;
+}
+
+extern reference *reference_create(reference_map *map, chunk *label, chunk *url, chunk *title)
+{
+ reference *ref;
+ ref = malloc(sizeof(reference));
+ ref->label = normalize_reference(label);
+ ref->hash = refhash(ref->label);
+ ref->url = clean_url(url);
+ ref->title = clean_title(title);
+ ref->next = NULL;
+
+ add_reference(map, ref);
+
+ return ref;
+}
+
+// Returns reference if refmap contains a reference with matching
+// label, otherwise NULL.
+reference* reference_lookup(reference_map *map, chunk *label)
+{
+ reference *ref = NULL;
+ unsigned char *norm;
+ unsigned int hash;
+
+ if (map == NULL)
+ return NULL;
+
+ norm = normalize_reference(label);
+ hash = refhash(norm);
+ ref = map->table[hash % REFMAP_SIZE];
+
+ while (ref) {
+ if (ref->label[0] == norm[0] &&
+ !strcmp((char *)ref->label, (char *)norm))
+ break;
+ ref = ref->next;
+ }
+
+ free(norm);
+ return ref;
+}
+
+static void reference_free(reference *ref)
+{
+ free(ref->label);
+ free(ref->url);
+ free(ref->title);
+ free(ref);
+}
+
+void reference_map_free(reference_map *map)
+{
+ unsigned int i;
+
+ for (i = 0; i < REFMAP_SIZE; ++i) {
+ reference *ref = map->table[i];
+ reference *next;
+
+ while (ref) {
+ next = ref->next;
+ reference_free(ref);
+ ref = next;
+ }
+ }
+
+ free(map->table);
+ free(map);
+}
+
+reference_map *reference_map_new(void)
+{
+ reference_map *map = malloc(sizeof(reference_map));
+ memset(map, 0x0, sizeof(reference_map));
+ return map;
+}
+
diff --git a/src/references.h b/src/references.h
new file mode 100644
index 0000000..78fffe7
--- /dev/null
+++ b/src/references.h
@@ -0,0 +1,27 @@
+#ifndef _REFERENCES_H_
+#define _REFERENCES_H_
+
+#define REFMAP_SIZE 16
+
+struct reference {
+ struct reference *next;
+ unsigned char *label;
+ unsigned char *url;
+ unsigned char *title;
+ unsigned int hash;
+};
+
+typedef struct reference reference;
+
+struct reference_map {
+ reference *table[REFMAP_SIZE];
+};
+
+typedef struct reference_map reference_map;
+
+reference_map *reference_map_new(void);
+void reference_map_free(reference_map *map);
+reference* reference_lookup(reference_map *map, chunk *label);
+extern reference *reference_create(reference_map *map, chunk *label, chunk *url, chunk *title);
+
+#endif
diff --git a/src/stmd.h b/src/stmd.h
index 21a86b0..4e21e6c 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -5,7 +5,7 @@
#include
#include "buffer.h"
#include "chunk.h"
-#include "uthash.h"
+#include "references.h"
#define VERSION "0.1"
#define CODE_INDENT 4
@@ -36,17 +36,7 @@ struct node_inl {
typedef struct node_inl node_inl;
-struct reference {
- unsigned char *label;
- unsigned char *url;
- unsigned char *title;
- UT_hash_handle hh; // used by uthash
-};
-
-typedef struct reference reference;
-
// Types for blocks
-
struct ListData {
enum {
bullet,
@@ -104,7 +94,7 @@ struct node_block {
int level;
} header;
struct {
- reference** refmap;
+ reference_map *refmap;
} document;
} as;
@@ -114,14 +104,10 @@ struct node_block {
typedef struct node_block node_block;
-node_inl* parse_inlines(strbuf *input, reference** refmap);
+node_inl* parse_inlines(strbuf *input, reference_map *refmap);
void free_inlines(node_inl* e);
-int parse_reference(strbuf *input, reference** refmap);
-void free_reference(reference *ref);
-void free_reference_map(reference **refmap);
-
-void add_reference(reference** refmap, reference* ref);
+int parse_reference_inline(strbuf *input, reference_map *refmap);
void unescape_buffer(strbuf *buf);
extern node_block* make_document();
@@ -138,4 +124,8 @@ void print_blocks(node_block* blk, int indent);
void blocks_to_html(strbuf *html, node_block *b, bool tight);
void inlines_to_html(strbuf *html, node_inl *b);
+unsigned char *clean_url(chunk *url);
+unsigned char *clean_autolink(chunk *url, int is_email);
+unsigned char *clean_title(chunk *title);
+
#endif
diff --git a/src/utf8.c b/src/utf8.c
index 12d7ba5..c65aec6 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -25,7 +25,7 @@ static const int8_t utf8proc_utf8class[256] = {
static void encode_unknown(strbuf *buf)
{
- static const unsigned char repl[] = {239, 191, 189};
+ static const uint8_t repl[] = {239, 191, 189};
strbuf_put(buf, repl, 3);
}
@@ -52,9 +52,9 @@ ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len)
return length;
}
-void utf8proc_detab(strbuf *ob, const unsigned char *line, size_t size)
+void utf8proc_detab(strbuf *ob, const uint8_t *line, size_t size)
{
- static const unsigned char whitespace[] = " ";
+ static const uint8_t whitespace[] = " ";
size_t i = 0, tab = 0;
@@ -132,7 +132,7 @@ ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst)
void utf8proc_encode_char(int32_t uc, strbuf *buf)
{
- unsigned char dst[4];
+ uint8_t dst[4];
int len = 0;
assert(uc >= 0);
@@ -169,7 +169,7 @@ void utf8proc_encode_char(int32_t uc, strbuf *buf)
strbuf_put(buf, dst, len);
}
-void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len)
+void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len)
{
int32_t c;
diff --git a/src/utf8.h b/src/utf8.h
index 1e4e556..9506b75 100644
--- a/src/utf8.h
+++ b/src/utf8.h
@@ -1,12 +1,13 @@
#ifndef _H_STMD_UTF8_
#define _H_STMD_UTF8_
+#include
#include "buffer.h"
-void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len);
+void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len);
void utf8proc_encode_char(int32_t uc, strbuf *buf);
ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst);
ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len);
-void utf8proc_detab(strbuf *dest, const unsigned char *line, size_t size);
+void utf8proc_detab(strbuf *dest, const uint8_t *line, size_t size);
#endif
--
cgit v1.2.3