From c28af79329264a7cf331a1b1c414919e4ed9e9f9 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Tue, 2 Sep 2014 13:37:34 +0200
Subject: It buiiiilds
---
src/html/houdini.h | 44 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 44 insertions(+)
create mode 100644 src/html/houdini.h
(limited to 'src/html/houdini.h')
diff --git a/src/html/houdini.h b/src/html/houdini.h
new file mode 100644
index 0000000..31fe917
--- /dev/null
+++ b/src/html/houdini.h
@@ -0,0 +1,44 @@
+#ifndef __HOUDINI_H__
+#define __HOUDINI_H__
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include
+#include "buffer.h"
+
+#define likely(x) __builtin_expect((x),1)
+#define unlikely(x) __builtin_expect((x),0)
+
+#ifdef HOUDINI_USE_LOCALE
+# define _isxdigit(c) isxdigit(c)
+# define _isdigit(c) isdigit(c)
+#else
+/*
+ * Helper _isdigit methods -- do not trust the current locale
+ * */
+# define _isxdigit(c) (strchr("0123456789ABCDEFabcdef", (c)) != NULL)
+# define _isdigit(c) ((c) >= '0' && (c) <= '9')
+#endif
+
+#define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
+#define HOUDINI_UNESCAPED_SIZE(x) (x)
+
+extern int houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure);
+extern int houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size);
+extern int houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size);
+extern int houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size);
+extern int houdini_unescape_js(gh_buf *ob, const uint8_t *src, size_t size);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--
cgit v1.2.3
From 543c2c94d71adee42c7bd2f8027d75c87ed8120d Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Thu, 4 Sep 2014 18:38:14 +0200
Subject: Rename to strbuf
---
src/blocks.c | 64 +++++++++++++++----------------
src/buffer.c | 86 ++++++++++++++++++++---------------------
src/buffer.h | 80 +++++++++++++++++++-------------------
src/chunk.h | 4 +-
src/html/houdini.h | 22 +++++------
src/html/houdini_href_e.c | 12 +++---
src/html/houdini_html_e.c | 10 ++---
src/html/html.c | 98 +++++++++++++++++++++++------------------------
src/inlines.c | 50 ++++++++++++------------
src/main.c | 4 +-
src/stmd.h | 16 ++++----
src/utf8.c | 6 +--
12 files changed, 226 insertions(+), 226 deletions(-)
(limited to 'src/html/houdini.h')
diff --git a/src/blocks.c b/src/blocks.c
index cf0e9e4..9faccd9 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -10,7 +10,7 @@
#define peek_at(i, n) (i)->data[n]
-static void incorporate_line(gh_buf *ln, int line_number, block** curptr);
+static void incorporate_line(strbuf *ln, int line_number, block** curptr);
static void finalize(block* b, int line_number);
static block* make_block(int tag, int start_line, int start_column)
@@ -28,7 +28,7 @@ static block* make_block(int tag, int start_line, int start_column)
e->parent = NULL;
e->top = NULL;
e->attributes.refmap = NULL;
- gh_buf_init(&e->string_content, 32);
+ strbuf_init(&e->string_content, 32);
e->inline_content = NULL;
e->next = NULL;
e->prev = NULL;
@@ -49,7 +49,7 @@ extern block* make_document()
}
// Returns true if line has only space characters, else false.
-bool is_blank(gh_buf *s, int offset)
+bool is_blank(strbuf *s, int offset)
{
while (offset < s->size) {
switch (s->ptr[offset]) {
@@ -85,10 +85,10 @@ static inline bool accepts_lines(int block_type)
static void add_line(block* block, chunk *ch, int offset)
{
assert(block->open);
- gh_buf_put(&block->string_content, ch->data + offset, ch->len - offset);
+ strbuf_put(&block->string_content, ch->data + offset, ch->len - offset);
}
-static void remove_trailing_blank_lines(gh_buf *ln)
+static void remove_trailing_blank_lines(strbuf *ln)
{
int i;
@@ -100,13 +100,13 @@ static void remove_trailing_blank_lines(gh_buf *ln)
}
if (i < 0) {
- gh_buf_clear(ln);
+ strbuf_clear(ln);
return;
}
- i = gh_buf_strchr(ln, '\n', i);
+ i = strbuf_strchr(ln, '\n', i);
if (i >= 0)
- gh_buf_truncate(ln, i);
+ strbuf_truncate(ln, i);
}
// Check to see if a block ends with a blank line, descending
@@ -164,10 +164,10 @@ static void finalize(block* b, int line_number)
switch (b->tag) {
case paragraph:
pos = 0;
- while (gh_buf_at(&b->string_content, 0) == '[' &&
+ while (strbuf_at(&b->string_content, 0) == '[' &&
(pos = parse_reference(&b->string_content, b->top->attributes.refmap))) {
- gh_buf_drop(&b->string_content, pos);
+ strbuf_drop(&b->string_content, pos);
}
if (is_blank(&b->string_content, 0)) {
b->tag = reference_def;
@@ -176,23 +176,23 @@ static void finalize(block* b, int line_number)
case indented_code:
remove_trailing_blank_lines(&b->string_content);
- gh_buf_putc(&b->string_content, '\n');
+ strbuf_putc(&b->string_content, '\n');
break;
case fenced_code:
// first line of contents becomes info
- firstlinelen = gh_buf_strchr(&b->string_content, '\n', 0);
+ firstlinelen = strbuf_strchr(&b->string_content, '\n', 0);
- gh_buf_init(&b->attributes.fenced_code_data.info, 0);
- gh_buf_set(
+ strbuf_init(&b->attributes.fenced_code_data.info, 0);
+ strbuf_set(
&b->attributes.fenced_code_data.info,
b->string_content.ptr,
firstlinelen
);
- gh_buf_drop(&b->string_content, firstlinelen + 1);
+ strbuf_drop(&b->string_content, firstlinelen + 1);
- gh_buf_trim(&b->attributes.fenced_code_data.info);
+ strbuf_trim(&b->attributes.fenced_code_data.info);
unescape_buffer(&b->attributes.fenced_code_data.info);
break;
@@ -265,9 +265,9 @@ extern void free_blocks(block* e)
while (e != NULL) {
next = e->next;
free_inlines(e->inline_content);
- gh_buf_free(&e->string_content);
+ strbuf_free(&e->string_content);
if (e->tag == fenced_code) {
- gh_buf_free(&e->attributes.fenced_code_data.info);
+ strbuf_free(&e->attributes.fenced_code_data.info);
} else if (e->tag == document) {
free_reference_map(e->attributes.refmap);
}
@@ -287,7 +287,7 @@ void process_inlines(block* cur, reference** refmap)
case setext_header:
cur->inline_content = parse_inlines(&cur->string_content, refmap);
// MEM
- // gh_buf_free(&cur->string_content);
+ // strbuf_free(&cur->string_content);
break;
default:
@@ -369,7 +369,7 @@ static int lists_match(struct ListData list_data,
list_data.bullet_char == item_data.bullet_char);
}
-static void expand_tabs(gh_buf *ob, const unsigned char *line, size_t size)
+static void expand_tabs(strbuf *ob, const unsigned char *line, size_t size)
{
size_t i = 0, tab = 0;
@@ -381,13 +381,13 @@ static void expand_tabs(gh_buf *ob, const unsigned char *line, size_t size)
}
if (i > org)
- gh_buf_put(ob, line + org, i - org);
+ strbuf_put(ob, line + org, i - org);
if (i >= size)
break;
do {
- gh_buf_putc(ob, ' '); tab++;
+ strbuf_putc(ob, ' '); tab++;
} while (tab % 4);
i++;
@@ -409,7 +409,7 @@ static block *finalize_document(block *document, int linenum)
extern block *stmd_parse_file(FILE *f)
{
- gh_buf line = GH_BUF_INIT;
+ strbuf line = GH_BUF_INIT;
unsigned char buffer[4096];
int linenum = 1;
block *document = make_document();
@@ -417,17 +417,17 @@ extern block *stmd_parse_file(FILE *f)
while (fgets((char *)buffer, sizeof(buffer), f)) {
expand_tabs(&line, buffer, strlen((char *)buffer));
incorporate_line(&line, linenum, &document);
- gh_buf_clear(&line);
+ strbuf_clear(&line);
linenum++;
}
- gh_buf_free(&line);
+ strbuf_free(&line);
return finalize_document(document, linenum);
}
extern block *stmd_parse_document(const unsigned char *buffer, size_t len)
{
- gh_buf line = GH_BUF_INIT;
+ strbuf line = GH_BUF_INIT;
int linenum = 1;
const unsigned char *end = buffer + len;
block *document = make_document();
@@ -444,11 +444,11 @@ extern block *stmd_parse_document(const unsigned char *buffer, size_t len)
}
incorporate_line(&line, linenum, &document);
- gh_buf_clear(&line);
+ strbuf_clear(&line);
linenum++;
}
- gh_buf_free(&line);
+ strbuf_free(&line);
return finalize_document(document, linenum);
}
@@ -471,7 +471,7 @@ static void chop_trailing_hashtags(chunk *ch)
}
// Process one line at a time, modifying a block.
-static void incorporate_line(gh_buf *line, int line_number, block** curptr)
+static void incorporate_line(strbuf *line, int line_number, block** curptr)
{
block* last_matched_container;
int offset = 0;
@@ -639,8 +639,8 @@ static void incorporate_line(gh_buf *line, int line_number, block** curptr)
} else if (container->tag == paragraph &&
(lev = scan_setext_header_line(&input, first_nonspace)) &&
// check that there is only one line in the paragraph:
- gh_buf_strrchr(&container->string_content, '\n',
- gh_buf_len(&container->string_content) - 2) < 0) {
+ strbuf_strrchr(&container->string_content, '\n',
+ strbuf_len(&container->string_content) - 2) < 0) {
container->tag = setext_header;
container->attributes.header_level = lev;
@@ -734,7 +734,7 @@ static void incorporate_line(gh_buf *line, int line_number, block** curptr)
container == last_matched_container &&
!blank &&
cur->tag == paragraph &&
- gh_buf_len(&cur->string_content) > 0) {
+ strbuf_len(&cur->string_content) > 0) {
add_line(cur, &input, offset);
diff --git a/src/buffer.c b/src/buffer.c
index dc4a405..90c2186 100644
--- a/src/buffer.c
+++ b/src/buffer.c
@@ -9,32 +9,32 @@
#include "buffer.h"
-/* Used as default value for gh_buf->ptr so that people can always
- * assume ptr is non-NULL and zero terminated even for new gh_bufs.
+/* Used as default value for strbuf->ptr so that people can always
+ * assume ptr is non-NULL and zero terminated even for new strbufs.
*/
-unsigned char gh_buf__initbuf[1];
-unsigned char gh_buf__oom[1];
+unsigned char strbuf__initbuf[1];
+unsigned char strbuf__oom[1];
#define ENSURE_SIZE(b, d) \
- if ((d) > buf->asize && gh_buf_grow(b, (d)) < 0)\
+ if ((d) > buf->asize && strbuf_grow(b, (d)) < 0)\
return -1;
-void gh_buf_init(gh_buf *buf, int initial_size)
+void strbuf_init(strbuf *buf, int initial_size)
{
buf->asize = 0;
buf->size = 0;
- buf->ptr = gh_buf__initbuf;
+ buf->ptr = strbuf__initbuf;
if (initial_size)
- gh_buf_grow(buf, initial_size);
+ strbuf_grow(buf, initial_size);
}
-int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom)
+int strbuf_try_grow(strbuf *buf, int target_size, bool mark_oom)
{
unsigned char *new_ptr;
int new_size;
- if (buf->ptr == gh_buf__oom)
+ if (buf->ptr == strbuf__oom)
return -1;
if (target_size <= buf->asize)
@@ -60,7 +60,7 @@ int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom)
if (!new_ptr) {
if (mark_oom)
- buf->ptr = gh_buf__oom;
+ buf->ptr = strbuf__oom;
return -1;
}
@@ -75,17 +75,17 @@ int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom)
return 0;
}
-void gh_buf_free(gh_buf *buf)
+void strbuf_free(strbuf *buf)
{
if (!buf) return;
- if (buf->ptr != gh_buf__initbuf && buf->ptr != gh_buf__oom)
+ if (buf->ptr != strbuf__initbuf && buf->ptr != strbuf__oom)
free(buf->ptr);
- gh_buf_init(buf, 0);
+ strbuf_init(buf, 0);
}
-void gh_buf_clear(gh_buf *buf)
+void strbuf_clear(strbuf *buf)
{
buf->size = 0;
@@ -93,10 +93,10 @@ void gh_buf_clear(gh_buf *buf)
buf->ptr[0] = '\0';
}
-int gh_buf_set(gh_buf *buf, const unsigned char *data, int len)
+int strbuf_set(strbuf *buf, const unsigned char *data, int len)
{
if (len <= 0 || data == NULL) {
- gh_buf_clear(buf);
+ strbuf_clear(buf);
} else {
if (data != buf->ptr) {
ENSURE_SIZE(buf, len + 1);
@@ -108,14 +108,14 @@ int gh_buf_set(gh_buf *buf, const unsigned char *data, int len)
return 0;
}
-int gh_buf_sets(gh_buf *buf, const char *string)
+int strbuf_sets(strbuf *buf, const char *string)
{
- return gh_buf_set(buf,
+ return strbuf_set(buf,
(const unsigned char *)string,
string ? strlen(string) : 0);
}
-int gh_buf_putc(gh_buf *buf, int c)
+int strbuf_putc(strbuf *buf, int c)
{
ENSURE_SIZE(buf, buf->size + 2);
buf->ptr[buf->size++] = c;
@@ -123,7 +123,7 @@ int gh_buf_putc(gh_buf *buf, int c)
return 0;
}
-int gh_buf_put(gh_buf *buf, const unsigned char *data, int len)
+int strbuf_put(strbuf *buf, const unsigned char *data, int len)
{
if (len <= 0)
return 0;
@@ -135,12 +135,12 @@ int gh_buf_put(gh_buf *buf, const unsigned char *data, int len)
return 0;
}
-int gh_buf_puts(gh_buf *buf, const char *string)
+int strbuf_puts(strbuf *buf, const char *string)
{
- return gh_buf_put(buf, (const unsigned char *)string, strlen(string));
+ return strbuf_put(buf, (const unsigned char *)string, strlen(string));
}
-int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap)
+int strbuf_vprintf(strbuf *buf, const char *format, va_list ap)
{
const int expected_size = buf->size + (strlen(format) * 2);
int len;
@@ -159,7 +159,7 @@ int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap)
if (len < 0) {
free(buf->ptr);
- buf->ptr = gh_buf__oom;
+ buf->ptr = strbuf__oom;
return -1;
}
@@ -174,19 +174,19 @@ int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap)
return 0;
}
-int gh_buf_printf(gh_buf *buf, const char *format, ...)
+int strbuf_printf(strbuf *buf, const char *format, ...)
{
int r;
va_list ap;
va_start(ap, format);
- r = gh_buf_vprintf(buf, format, ap);
+ r = strbuf_vprintf(buf, format, ap);
va_end(ap);
return r;
}
-void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf)
+void strbuf_copy_cstr(char *data, int datasize, const strbuf *buf)
{
int copylen;
@@ -204,28 +204,28 @@ void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf)
data[copylen] = '\0';
}
-void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b)
+void strbuf_swap(strbuf *buf_a, strbuf *buf_b)
{
- gh_buf t = *buf_a;
+ strbuf t = *buf_a;
*buf_a = *buf_b;
*buf_b = t;
}
-unsigned char *gh_buf_detach(gh_buf *buf)
+unsigned char *strbuf_detach(strbuf *buf)
{
unsigned char *data = buf->ptr;
- if (buf->asize == 0 || buf->ptr == gh_buf__oom)
+ if (buf->asize == 0 || buf->ptr == strbuf__oom)
return NULL;
- gh_buf_init(buf, 0);
+ strbuf_init(buf, 0);
return data;
}
-void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize)
+void strbuf_attach(strbuf *buf, unsigned char *ptr, int asize)
{
- gh_buf_free(buf);
+ strbuf_free(buf);
if (ptr) {
buf->ptr = ptr;
@@ -235,18 +235,18 @@ void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize)
else /* pass 0 to fall back on strlen + 1 */
buf->asize = buf->size + 1;
} else {
- gh_buf_grow(buf, asize);
+ strbuf_grow(buf, asize);
}
}
-int gh_buf_cmp(const gh_buf *a, const gh_buf *b)
+int strbuf_cmp(const strbuf *a, const strbuf *b)
{
int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size));
return (result != 0) ? result :
(a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
}
-int gh_buf_strchr(const gh_buf *buf, int c, int pos)
+int strbuf_strchr(const strbuf *buf, int c, int pos)
{
const unsigned char *p = memchr(buf->ptr + pos, c, buf->size - pos);
if (!p)
@@ -255,7 +255,7 @@ int gh_buf_strchr(const gh_buf *buf, int c, int pos)
return (int)(p - (const unsigned char *)buf->ptr);
}
-int gh_buf_strrchr(const gh_buf *buf, int c, int pos)
+int strbuf_strrchr(const strbuf *buf, int c, int pos)
{
int i;
@@ -267,7 +267,7 @@ int gh_buf_strrchr(const gh_buf *buf, int c, int pos)
return -1;
}
-void gh_buf_truncate(gh_buf *buf, int len)
+void strbuf_truncate(strbuf *buf, int len)
{
if (len < buf->size) {
buf->size = len;
@@ -275,7 +275,7 @@ void gh_buf_truncate(gh_buf *buf, int len)
}
}
-void gh_buf_drop(gh_buf *buf, int n)
+void strbuf_drop(strbuf *buf, int n)
{
if (n > 0) {
buf->size = buf->size - n;
@@ -286,7 +286,7 @@ void gh_buf_drop(gh_buf *buf, int n)
}
}
-void gh_buf_trim(gh_buf *buf)
+void strbuf_trim(strbuf *buf)
{
int i = 0;
@@ -296,7 +296,7 @@ void gh_buf_trim(gh_buf *buf)
while (i < buf->size && isspace(buf->ptr[i]))
i++;
- gh_buf_drop(buf, i);
+ strbuf_drop(buf, i);
/* rtrim */
while (buf->size > 0) {
diff --git a/src/buffer.h b/src/buffer.h
index 0d5143e..6f45cbb 100644
--- a/src/buffer.h
+++ b/src/buffer.h
@@ -9,20 +9,20 @@
typedef struct {
unsigned char *ptr;
int asize, size;
-} gh_buf;
+} strbuf;
-extern unsigned char gh_buf__initbuf[];
-extern unsigned char gh_buf__oom[];
+extern unsigned char strbuf__initbuf[];
+extern unsigned char strbuf__oom[];
-#define GH_BUF_INIT { gh_buf__initbuf, 0, 0 }
+#define GH_BUF_INIT { strbuf__initbuf, 0, 0 }
/**
- * Initialize a gh_buf structure.
+ * Initialize a strbuf structure.
*
* For the cases where GH_BUF_INIT cannot be used to do static
* initialization.
*/
-extern void gh_buf_init(gh_buf *buf, int initial_size);
+extern void strbuf_init(strbuf *buf, int initial_size);
/**
* Attempt to grow the buffer to hold at least `target_size` bytes.
@@ -32,7 +32,7 @@ extern void gh_buf_init(gh_buf *buf, int initial_size);
* existing buffer content will be preserved, but calling code must handle
* that buffer was not expanded.
*/
-extern int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom);
+extern int strbuf_try_grow(strbuf *buf, int target_size, bool mark_oom);
/**
* Grow the buffer to hold at least `target_size` bytes.
@@ -42,71 +42,71 @@ extern int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom);
*
* @return 0 on success or -1 on failure
*/
-static inline int gh_buf_grow(gh_buf *buf, int target_size)
+static inline int strbuf_grow(strbuf *buf, int target_size)
{
- return gh_buf_try_grow(buf, target_size, true);
+ return strbuf_try_grow(buf, target_size, true);
}
-extern void gh_buf_free(gh_buf *buf);
-extern void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b);
+extern void strbuf_free(strbuf *buf);
+extern void strbuf_swap(strbuf *buf_a, strbuf *buf_b);
/**
- * Test if there have been any reallocation failures with this gh_buf.
+ * Test if there have been any reallocation failures with this strbuf.
*
- * Any function that writes to a gh_buf can fail due to memory allocation
- * issues. If one fails, the gh_buf will be marked with an OOM error and
- * further calls to modify the buffer will fail. Check gh_buf_oom() at the
+ * Any function that writes to a strbuf can fail due to memory allocation
+ * issues. If one fails, the strbuf will be marked with an OOM error and
+ * further calls to modify the buffer will fail. Check strbuf_oom() at the
* end of your sequence and it will be true if you ran out of memory at any
* point with that buffer.
*
* @return false if no error, true if allocation error
*/
-static inline bool gh_buf_oom(const gh_buf *buf)
+static inline bool strbuf_oom(const strbuf *buf)
{
- return (buf->ptr == gh_buf__oom);
+ return (buf->ptr == strbuf__oom);
}
-static inline size_t gh_buf_len(const gh_buf *buf)
+static inline size_t strbuf_len(const strbuf *buf)
{
return buf->size;
}
-extern int gh_buf_cmp(const gh_buf *a, const gh_buf *b);
+extern int strbuf_cmp(const strbuf *a, const strbuf *b);
-extern void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize);
-extern unsigned char *gh_buf_detach(gh_buf *buf);
-extern void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf);
+extern void strbuf_attach(strbuf *buf, unsigned char *ptr, int asize);
+extern unsigned char *strbuf_detach(strbuf *buf);
+extern void strbuf_copy_cstr(char *data, int datasize, const strbuf *buf);
-static inline const char *gh_buf_cstr(const gh_buf *buf)
+static inline const char *strbuf_cstr(const strbuf *buf)
{
return (char *)buf->ptr;
}
-#define gh_buf_at(buf, n) ((buf)->ptr[n])
+#define strbuf_at(buf, n) ((buf)->ptr[n])
/*
* Functions below that return int value error codes will return 0 on
* success or -1 on failure (which generally means an allocation failed).
- * Using a gh_buf where the allocation has failed with result in -1 from
+ * Using a strbuf where the allocation has failed with result in -1 from
* all further calls using that buffer. As a result, you can ignore the
* return code of these functions and call them in a series then just call
- * gh_buf_oom at the end.
+ * strbuf_oom at the end.
*/
-extern int gh_buf_set(gh_buf *buf, const unsigned char *data, int len);
-extern int gh_buf_sets(gh_buf *buf, const char *string);
-extern int gh_buf_putc(gh_buf *buf, int c);
-extern int gh_buf_put(gh_buf *buf, const unsigned char *data, int len);
-extern int gh_buf_puts(gh_buf *buf, const char *string);
-extern int gh_buf_printf(gh_buf *buf, const char *format, ...)
+extern int strbuf_set(strbuf *buf, const unsigned char *data, int len);
+extern int strbuf_sets(strbuf *buf, const char *string);
+extern int strbuf_putc(strbuf *buf, int c);
+extern int strbuf_put(strbuf *buf, const unsigned char *data, int len);
+extern int strbuf_puts(strbuf *buf, const char *string);
+extern int strbuf_printf(strbuf *buf, const char *format, ...)
__attribute__((format (printf, 2, 3)));
-extern int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap);
-extern void gh_buf_clear(gh_buf *buf);
-
-int gh_buf_strchr(const gh_buf *buf, int c, int pos);
-int gh_buf_strrchr(const gh_buf *buf, int c, int pos);
-void gh_buf_drop(gh_buf *buf, int n);
-void gh_buf_truncate(gh_buf *buf, int len);
-void gh_buf_trim(gh_buf *buf);
+extern int strbuf_vprintf(strbuf *buf, const char *format, va_list ap);
+extern void strbuf_clear(strbuf *buf);
+
+int strbuf_strchr(const strbuf *buf, int c, int pos);
+int strbuf_strrchr(const strbuf *buf, int c, int pos);
+void strbuf_drop(strbuf *buf, int n);
+void strbuf_truncate(strbuf *buf, int len);
+void strbuf_trim(strbuf *buf);
#endif
diff --git a/src/chunk.h b/src/chunk.h
index f3841ed..f37a2f3 100644
--- a/src/chunk.h
+++ b/src/chunk.h
@@ -78,12 +78,12 @@ static inline chunk chunk_dup(const chunk *ch, int pos, int len)
return c;
}
-static inline chunk chunk_buf_detach(gh_buf *buf)
+static inline chunk chunk_buf_detach(strbuf *buf)
{
chunk c;
c.len = buf->size;
- c.data = gh_buf_detach(buf);
+ c.data = strbuf_detach(buf);
c.alloc = 1;
return c;
diff --git a/src/html/houdini.h b/src/html/houdini.h
index 31fe917..1e54d20 100644
--- a/src/html/houdini.h
+++ b/src/html/houdini.h
@@ -25,17 +25,17 @@ extern "C" {
#define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
#define HOUDINI_UNESCAPED_SIZE(x) (x)
-extern int houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure);
-extern int houdini_unescape_html(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_xml(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_uri(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_url(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_unescape_uri(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_unescape_url(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_escape_js(gh_buf *ob, const uint8_t *src, size_t size);
-extern int houdini_unescape_js(gh_buf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure);
+extern int houdini_unescape_html(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_xml(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_uri(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_url(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_unescape_uri(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_unescape_url(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_escape_js(strbuf *ob, const uint8_t *src, size_t size);
+extern int houdini_unescape_js(strbuf *ob, const uint8_t *src, size_t size);
#ifdef __cplusplus
}
diff --git a/src/html/houdini_href_e.c b/src/html/houdini_href_e.c
index b2a7d79..12456ce 100644
--- a/src/html/houdini_href_e.c
+++ b/src/html/houdini_href_e.c
@@ -49,7 +49,7 @@ static const char HREF_SAFE[] = {
};
int
-houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
+houdini_escape_href(strbuf *ob, const uint8_t *src, size_t size)
{
static const uint8_t hex_chars[] = "0123456789ABCDEF";
size_t i = 0, org;
@@ -63,7 +63,7 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
i++;
if (likely(i > org))
- gh_buf_put(ob, src + org, i - org);
+ strbuf_put(ob, src + org, i - org);
/* escaping */
if (i >= size)
@@ -73,14 +73,14 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
/* amp appears all the time in URLs, but needs
* HTML-entity escaping to be inside an href */
case '&':
- gh_buf_puts(ob, "&");
+ strbuf_puts(ob, "&");
break;
/* the single quote is a valid URL character
* according to the standard; it needs HTML
* entity escaping too */
case '\'':
- gh_buf_puts(ob, "'");
+ strbuf_puts(ob, "'");
break;
/* the space can be escaped to %20 or a plus
@@ -89,7 +89,7 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
* when building GET strings */
#if 0
case ' ':
- gh_buf_putc(ob, '+');
+ strbuf_putc(ob, '+');
break;
#endif
@@ -97,7 +97,7 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size)
default:
hex_str[1] = hex_chars[(src[i] >> 4) & 0xF];
hex_str[2] = hex_chars[src[i] & 0xF];
- gh_buf_put(ob, hex_str, 3);
+ strbuf_put(ob, hex_str, 3);
}
i++;
diff --git a/src/html/houdini_html_e.c b/src/html/houdini_html_e.c
index 95b6c41..f2e86fe 100644
--- a/src/html/houdini_html_e.c
+++ b/src/html/houdini_html_e.c
@@ -45,7 +45,7 @@ static const char *HTML_ESCAPES[] = {
};
int
-houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
+houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure)
{
size_t i = 0, org, esc = 0;
@@ -55,7 +55,7 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
i++;
if (i > org)
- gh_buf_put(ob, src + org, i - org);
+ strbuf_put(ob, src + org, i - org);
/* escaping */
if (unlikely(i >= size))
@@ -63,9 +63,9 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
/* The forward slash is only escaped in secure mode */
if ((src[i] == '/' || src[i] == '\'') && !secure) {
- gh_buf_putc(ob, src[i]);
+ strbuf_putc(ob, src[i]);
} else {
- gh_buf_puts(ob, HTML_ESCAPES[esc]);
+ strbuf_puts(ob, HTML_ESCAPES[esc]);
}
i++;
@@ -75,7 +75,7 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure)
}
int
-houdini_escape_html(gh_buf *ob, const uint8_t *src, size_t size)
+houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size)
{
return houdini_escape_html0(ob, src, size, 1);
}
diff --git a/src/html/html.c b/src/html/html.c
index 41b8fda..a9356dd 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -10,7 +10,7 @@
// Functions to convert block and inline lists to HTML strings.
-static void escape_html(gh_buf *dest, const unsigned char *source, int length)
+static void escape_html(strbuf *dest, const unsigned char *source, int length)
{
if (length < 0)
length = strlen((char *)source);
@@ -18,7 +18,7 @@ static void escape_html(gh_buf *dest, const unsigned char *source, int length)
houdini_escape_html0(dest, source, (size_t)length, 0);
}
-static void escape_href(gh_buf *dest, const unsigned char *source, int length)
+static void escape_href(strbuf *dest, const unsigned char *source, int length)
{
if (length < 0)
length = strlen((char *)source);
@@ -26,14 +26,14 @@ static void escape_href(gh_buf *dest, const unsigned char *source, int length)
houdini_escape_href(dest, source, (size_t)length);
}
-static inline void cr(gh_buf *html)
+static inline void cr(strbuf *html)
{
if (html->size && html->ptr[html->size - 1] != '\n')
- gh_buf_putc(html, '\n');
+ strbuf_putc(html, '\n');
}
// Convert a block list to HTML. Returns 0 on success, and sets result.
-void blocks_to_html(gh_buf *html, block *b, bool tight)
+void blocks_to_html(strbuf *html, block *b, bool tight)
{
struct ListData *data;
@@ -48,25 +48,25 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
inlines_to_html(html, b->inline_content);
} else {
cr(html);
- gh_buf_puts(html, "");
+ strbuf_puts(html, "
");
inlines_to_html(html, b->inline_content);
- gh_buf_puts(html, "
\n");
+ strbuf_puts(html, "
\n");
}
break;
case block_quote:
cr(html);
- gh_buf_puts(html, "\n");
+ strbuf_puts(html, "\n");
blocks_to_html(html, b->children, false);
- gh_buf_puts(html, "
\n");
+ strbuf_puts(html, "
\n");
break;
case list_item:
cr(html);
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
blocks_to_html(html, b->children, tight);
- gh_buf_trim(html); /* TODO: rtrim */
- gh_buf_puts(html, "\n");
+ strbuf_trim(html); /* TODO: rtrim */
+ strbuf_puts(html, "\n");
break;
case list:
@@ -75,58 +75,58 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
data = &(b->attributes.list_data);
if (data->start > 1) {
- gh_buf_printf(html, "<%s start=\"%d\">\n",
+ strbuf_printf(html, "<%s start=\"%d\">\n",
data->list_type == bullet ? "ul" : "ol",
data->start);
} else {
- gh_buf_puts(html, data->list_type == bullet ? "\n" : "\n");
+ strbuf_puts(html, data->list_type == bullet ? "\n" : "\n");
}
blocks_to_html(html, b->children, data->tight);
- gh_buf_puts(html, data->list_type == bullet ? "
" : "
");
- gh_buf_putc(html, '\n');
+ strbuf_puts(html, data->list_type == bullet ? "
" : "");
+ strbuf_putc(html, '\n');
break;
case atx_header:
case setext_header:
cr(html);
- gh_buf_printf(html, "", b->attributes.header_level);
+ strbuf_printf(html, "", b->attributes.header_level);
inlines_to_html(html, b->inline_content);
- gh_buf_printf(html, "\n", b->attributes.header_level);
+ strbuf_printf(html, "\n", b->attributes.header_level);
break;
case indented_code:
case fenced_code:
cr(html);
- gh_buf_puts(html, "tag == fenced_code) {
- gh_buf *info = &b->attributes.fenced_code_data.info;
+ strbuf *info = &b->attributes.fenced_code_data.info;
- if (gh_buf_len(info) > 0) {
- int first_tag = gh_buf_strchr(info, ' ', 0);
+ if (strbuf_len(info) > 0) {
+ int first_tag = strbuf_strchr(info, ' ', 0);
if (first_tag < 0)
- first_tag = gh_buf_len(info);
+ first_tag = strbuf_len(info);
- gh_buf_puts(html, " class=\"");
+ strbuf_puts(html, " class=\"");
escape_html(html, info->ptr, first_tag);
- gh_buf_putc(html, '"');
+ strbuf_putc(html, '"');
}
}
- gh_buf_puts(html, ">");
+ strbuf_puts(html, ">");
escape_html(html, b->string_content.ptr, b->string_content.size);
- gh_buf_puts(html, "
\n");
+ strbuf_puts(html, "\n");
break;
case html_block:
- gh_buf_put(html, b->string_content.ptr, b->string_content.size);
+ strbuf_put(html, b->string_content.ptr, b->string_content.size);
break;
case hrule:
- gh_buf_puts(html, "
\n");
+ strbuf_puts(html, "
\n");
break;
case reference_def:
@@ -141,9 +141,9 @@ void blocks_to_html(gh_buf *html, block *b, bool tight)
}
// Convert an inline list to HTML. Returns 0 on success, and sets result.
-void inlines_to_html(gh_buf *html, inl* ils)
+void inlines_to_html(strbuf *html, inl* ils)
{
- gh_buf scrap = GH_BUF_INIT;
+ strbuf scrap = GH_BUF_INIT;
while(ils != NULL) {
switch(ils->tag) {
@@ -152,70 +152,70 @@ void inlines_to_html(gh_buf *html, inl* ils)
break;
case INL_LINEBREAK:
- gh_buf_puts(html, "
\n");
+ strbuf_puts(html, "
\n");
break;
case INL_SOFTBREAK:
- gh_buf_putc(html, '\n');
+ strbuf_putc(html, '\n');
break;
case INL_CODE:
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
escape_html(html, ils->content.literal.data, ils->content.literal.len);
- gh_buf_puts(html, "
");
+ strbuf_puts(html, "
");
break;
case INL_RAW_HTML:
case INL_ENTITY:
- gh_buf_put(html,
+ strbuf_put(html,
ils->content.literal.data,
ils->content.literal.len);
break;
case INL_LINK:
- gh_buf_puts(html, "content.linkable.url)
escape_href(html, ils->content.linkable.url, -1);
if (ils->content.linkable.title) {
- gh_buf_puts(html, "\" title=\"");
+ strbuf_puts(html, "\" title=\"");
escape_html(html, ils->content.linkable.title, -1);
}
- gh_buf_puts(html, "\">");
+ strbuf_puts(html, "\">");
inlines_to_html(html, ils->content.inlines);
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
break;
case INL_IMAGE:
- gh_buf_puts(html, "content.linkable.url)
escape_href(html, ils->content.linkable.url, -1);
inlines_to_html(&scrap, ils->content.inlines);
- gh_buf_puts(html, "\" alt=\"");
+ strbuf_puts(html, "\" alt=\"");
if (scrap.size)
escape_html(html, scrap.ptr, scrap.size);
- gh_buf_clear(&scrap);
+ strbuf_clear(&scrap);
if (ils->content.linkable.title) {
- gh_buf_puts(html, "\" title=\"");
+ strbuf_puts(html, "\" title=\"");
escape_html(html, ils->content.linkable.title, -1);
}
- gh_buf_puts(html, "\"/>");
+ strbuf_puts(html, "\"/>");
break;
case INL_STRONG:
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
inlines_to_html(html, ils->content.inlines);
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
break;
case INL_EMPH:
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
inlines_to_html(html, ils->content.inlines);
- gh_buf_puts(html, "");
+ strbuf_puts(html, "");
break;
}
ils = ils->next;
diff --git a/src/inlines.c b/src/inlines.c
index 8e2e683..33973df 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -25,7 +25,7 @@ inline static void chunk_free(chunk *c);
inline static void chunk_trim(chunk *c);
inline static chunk chunk_literal(const char *data);
-inline static chunk chunk_buf_detach(gh_buf *buf);
+inline static chunk chunk_buf_detach(strbuf *buf);
inline static chunk chunk_dup(const chunk *ch, int pos, int len);
static inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
@@ -33,10 +33,10 @@ static inl *parse_inlines_while(subject* subj, int (*f)(subject*));
static int parse_inline(subject* subj, inl ** last);
static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap);
-static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap);
+static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap);
static int subject_find_special_char(subject *subj);
-static void normalize_whitespace(gh_buf *s);
+static void normalize_whitespace(strbuf *s);
extern void free_reference(reference *ref) {
free(ref->label);
@@ -62,13 +62,13 @@ extern void free_reference_map(reference **refmap) {
// remove leading/trailing whitespace, case fold
static unsigned char *normalize_reference(chunk *ref)
{
- gh_buf normalized = GH_BUF_INIT;
+ strbuf normalized = GH_BUF_INIT;
utf8proc_case_fold(&normalized, ref->data, ref->len);
- gh_buf_trim(&normalized);
+ strbuf_trim(&normalized);
normalize_whitespace(&normalized);
- return gh_buf_detach(&normalized);
+ return strbuf_detach(&normalized);
}
// Returns reference if refmap contains a reference with matching
@@ -218,7 +218,7 @@ inline static inl* append_inlines(inl* a, inl* b)
return a;
}
-static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap)
+static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap)
{
e->input.data = buffer->ptr;
e->input.len = buffer->size;
@@ -309,7 +309,7 @@ static int scan_to_closing_backticks(subject* subj, int openticklength)
// Destructively modify string, collapsing consecutive
// space and newline characters into a single space.
-static void normalize_whitespace(gh_buf *s)
+static void normalize_whitespace(strbuf *s)
{
bool last_char_was_space = false;
int r, w;
@@ -331,7 +331,7 @@ static void normalize_whitespace(gh_buf *s)
}
}
- gh_buf_truncate(s, w);
+ strbuf_truncate(s, w);
}
// Parse backtick code section or raw backticks, return an inline.
@@ -346,10 +346,10 @@ static inl* handle_backticks(subject *subj)
subj->pos = startpos; // rewind
return make_str(openticks);
} else {
- gh_buf buf = GH_BUF_INIT;
+ strbuf buf = GH_BUF_INIT;
- gh_buf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
- gh_buf_trim(&buf);
+ strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len);
+ strbuf_trim(&buf);
normalize_whitespace(&buf);
return make_code(chunk_buf_detach(&buf));
@@ -569,7 +569,7 @@ static inl *make_str_with_entities(chunk *content)
}
// Destructively unescape a string: remove backslashes before punctuation chars.
-extern void unescape_buffer(gh_buf *buf)
+extern void unescape_buffer(strbuf *buf)
{
int r, w;
@@ -580,14 +580,14 @@ extern void unescape_buffer(gh_buf *buf)
buf->ptr[w++] = buf->ptr[r];
}
- gh_buf_truncate(buf, w);
+ strbuf_truncate(buf, w);
}
// Clean a URL: remove surrounding whitespace and surrounding <>,
// and remove \ that escape punctuation.
static unsigned char *clean_url(chunk *url, int is_email)
{
- gh_buf buf = GH_BUF_INIT;
+ strbuf buf = GH_BUF_INIT;
chunk_trim(url);
@@ -595,22 +595,22 @@ static unsigned char *clean_url(chunk *url, int is_email)
return NULL;
if (is_email)
- gh_buf_puts(&buf, "mailto:");
+ strbuf_puts(&buf, "mailto:");
if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
- gh_buf_put(&buf, url->data + 1, url->len - 2);
+ strbuf_put(&buf, url->data + 1, url->len - 2);
} else {
- gh_buf_put(&buf, url->data, url->len);
+ strbuf_put(&buf, url->data, url->len);
}
unescape_buffer(&buf);
- return gh_buf_detach(&buf);
+ return strbuf_detach(&buf);
}
// Clean a title: remove surrounding quotes and remove \ that escape punctuation.
static unsigned char *clean_title(chunk *title)
{
- gh_buf buf = GH_BUF_INIT;
+ strbuf buf = GH_BUF_INIT;
unsigned char first, last;
if (title->len == 0)
@@ -623,13 +623,13 @@ static unsigned char *clean_title(chunk *title)
if ((first == '\'' && last == '\'') ||
(first == '(' && last == ')') ||
(first == '"' && last == '"')) {
- gh_buf_set(&buf, title->data + 1, title->len - 2);
+ strbuf_set(&buf, title->data + 1, title->len - 2);
} else {
- gh_buf_set(&buf, title->data, title->len);
+ strbuf_set(&buf, title->data, title->len);
}
unescape_buffer(&buf);
- return gh_buf_detach(&buf);
+ return strbuf_detach(&buf);
}
// Parse an autolink or HTML tag.
@@ -971,7 +971,7 @@ static int parse_inline(subject* subj, inl ** last)
return 1;
}
-extern inl* parse_inlines(gh_buf *input, reference** refmap)
+extern inl* parse_inlines(strbuf *input, reference** refmap)
{
subject subj;
subject_from_buf(&subj, input, refmap);
@@ -993,7 +993,7 @@ void spnl(subject* subj)
// Modify refmap if a reference is encountered.
// Return 0 if no reference found, otherwise position of subject
// after reference is parsed.
-extern int parse_reference(gh_buf *input, reference** refmap)
+extern int parse_reference(strbuf *input, reference** refmap)
{
subject subj;
diff --git a/src/main.c b/src/main.c
index e1abedc..7cf67e2 100644
--- a/src/main.c
+++ b/src/main.c
@@ -14,14 +14,14 @@ void print_usage()
static void print_document(block *document, bool ast)
{
- gh_buf html = GH_BUF_INIT;
+ strbuf html = GH_BUF_INIT;
if (ast) {
print_blocks(document, 0);
} else {
blocks_to_html(&html, document, false);
printf("%s", html.ptr);
- gh_buf_free(&html);
+ strbuf_free(&html);
}
}
diff --git a/src/stmd.h b/src/stmd.h
index 4a3c399..2e86f3a 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -50,7 +50,7 @@ struct FencedCodeData {
int fence_length;
int fence_offset;
char fence_char;
- gh_buf info;
+ strbuf info;
};
typedef struct Block {
@@ -76,7 +76,7 @@ typedef struct Block {
struct Block* last_child;
struct Block* parent;
struct Block* top;
- gh_buf string_content;
+ strbuf string_content;
inl* inline_content;
union {
struct ListData list_data;
@@ -88,15 +88,15 @@ typedef struct Block {
struct Block * prev;
} block;
-inl* parse_inlines(gh_buf *input, reference** refmap);
+inl* parse_inlines(strbuf *input, reference** refmap);
void free_inlines(inl* e);
-int parse_reference(gh_buf *input, reference** refmap);
+int parse_reference(strbuf *input, reference** refmap);
void free_reference(reference *ref);
void free_reference_map(reference **refmap);
void add_reference(reference** refmap, reference* ref);
-void unescape_buffer(gh_buf *buf);
+void unescape_buffer(strbuf *buf);
extern block* make_document();
extern block* add_child(block* parent,
@@ -109,9 +109,9 @@ extern block *stmd_parse_file(FILE *f);
void print_inlines(inl* ils, int indent);
void print_blocks(block* blk, int indent);
-void blocks_to_html(gh_buf *html, block *b, bool tight);
-void inlines_to_html(gh_buf *html, inl *b);
+void blocks_to_html(strbuf *html, block *b, bool tight);
+void inlines_to_html(strbuf *html, inl *b);
-void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len);
+void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len);
#endif
diff --git a/src/utf8.c b/src/utf8.c
index 32c78a4..cebd872 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -84,7 +84,7 @@ ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst)
return length;
}
-void utf8proc_encode_char(int32_t uc, gh_buf *buf)
+void utf8proc_encode_char(int32_t uc, strbuf *buf)
{
unsigned char dst[4];
int len = 0;
@@ -119,10 +119,10 @@ void utf8proc_encode_char(int32_t uc, gh_buf *buf)
assert(false);
}
- gh_buf_put(buf, dst, len);
+ strbuf_put(buf, dst, len);
}
-void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len)
+void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len)
{
int32_t c;
--
cgit v1.2.3
From 61e3e606e64221eaa5cf3d83dc598d5a42818d10 Mon Sep 17 00:00:00 2001
From: Vicent Marti
Date: Sat, 6 Sep 2014 20:48:05 +0200
Subject: UTF8-aware detabbing and entity handling
---
Makefile | 13 ++++++-----
src/blocks.c | 35 ++++++------------------------
src/html/houdini.h | 2 ++
src/html/html.c | 1 -
src/inlines.c | 63 ++++++++++++++++++++++--------------------------------
src/print.c | 5 -----
src/stmd.h | 3 ---
src/utf8.c | 59 ++++++++++++++++++++++++++++++++++++++++++++------
8 files changed, 95 insertions(+), 86 deletions(-)
(limited to 'src/html/houdini.h')
diff --git a/Makefile b/Makefile
index 0d2eb8b..b5e487d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
-CFLAGS=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS)
-LDFLAGS=-g -O3 -Wall -Werror
+CFLAGS=-g -pg -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS)
+LDFLAGS=-g -pg -O3 -Wall -Werror
SRCDIR=src
DATADIR=data
@@ -41,11 +41,11 @@ testjs: spec.txt
benchjs:
node js/bench.js ${BENCHINP}
-HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o
+HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o $(SRCDIR)/html/houdini_html_u.o
STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o
-$(PROG): $(SRCDIR)/main.c $(HTML_OBJ) $(STMD_OBJ)
- $(CC) $(LDFLAGS) -o $@ $^
+$(PROG): $(SRCDIR)/html/html_unescape.h $(SRCDIR)/case_fold_switch.inc $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c
+ $(CC) $(LDFLAGS) -o $@ $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c
$(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re
re2c --case-insensitive -bis $< > $@ || (rm $@ && false)
@@ -53,6 +53,9 @@ $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re
$(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding-3.2.0.txt
perl mkcasefold.pl < $< > $@
+$(SRCDIR)/html/html_unescape.h: $(SRCDIR)/html/html_unescape.gperf
+ gperf -I -t -N find_entity -H hash_entity -K entity -C -l --null-strings -m5 $< > $@
+
.PHONY: leakcheck clean fuzztest dingus upload
dingus:
diff --git a/src/blocks.c b/src/blocks.c
index f671b5e..8c7d49c 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -5,6 +5,8 @@
#include
#include "stmd.h"
+#include "utf8.h"
+#include "html/houdini.h"
#include "scanners.h"
#include "uthash.h"
@@ -184,7 +186,7 @@ static void finalize(node_block* b, int line_number)
firstlinelen = strbuf_strchr(&b->string_content, '\n', 0);
strbuf_init(&b->attributes.fenced_code_data.info, 0);
- strbuf_set(
+ houdini_unescape_html_f(
&b->attributes.fenced_code_data.info,
b->string_content.ptr,
firstlinelen
@@ -369,31 +371,6 @@ static int lists_match(struct ListData list_data,
list_data.bullet_char == item_data.bullet_char);
}
-static void expand_tabs(strbuf *ob, const unsigned char *line, size_t size)
-{
- size_t i = 0, tab = 0;
-
- while (i < size) {
- size_t org = i;
-
- while (i < size && line[i] != '\t') {
- i++; tab++;
- }
-
- if (i > org)
- strbuf_put(ob, line + org, i - org);
-
- if (i >= size)
- break;
-
- do {
- strbuf_putc(ob, ' '); tab++;
- } while (tab % 4);
-
- i++;
- }
-}
-
static node_block *finalize_document(node_block *document, int linenum)
{
while (document != document->top) {
@@ -415,7 +392,7 @@ extern node_block *stmd_parse_file(FILE *f)
node_block *document = make_document();
while (fgets((char *)buffer, sizeof(buffer), f)) {
- expand_tabs(&line, buffer, strlen((char *)buffer));
+ utf8proc_detab(&line, buffer, strlen((char *)buffer));
incorporate_line(&line, linenum, &document);
strbuf_clear(&line);
linenum++;
@@ -436,10 +413,10 @@ extern node_block *stmd_parse_document(const unsigned char *buffer, size_t len)
const unsigned char *eol = memchr(buffer, '\n', end - buffer);
if (!eol) {
- expand_tabs(&line, buffer, end - buffer);
+ utf8proc_detab(&line, buffer, end - buffer);
buffer = end;
} else {
- expand_tabs(&line, buffer, (eol - buffer) + 1);
+ utf8proc_detab(&line, buffer, (eol - buffer) + 1);
buffer += (eol - buffer) + 1;
}
diff --git a/src/html/houdini.h b/src/html/houdini.h
index 1e54d20..5fd690d 100644
--- a/src/html/houdini.h
+++ b/src/html/houdini.h
@@ -25,9 +25,11 @@ extern "C" {
#define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10)
#define HOUDINI_UNESCAPED_SIZE(x) (x)
+extern size_t houdini_unescape_ent(strbuf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure);
extern int houdini_unescape_html(strbuf *ob, const uint8_t *src, size_t size);
+extern void houdini_unescape_html_f(strbuf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_xml(strbuf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_uri(strbuf *ob, const uint8_t *src, size_t size);
extern int houdini_escape_url(strbuf *ob, const uint8_t *src, size_t size);
diff --git a/src/html/html.c b/src/html/html.c
index 758ec80..595dfcd 100644
--- a/src/html/html.c
+++ b/src/html/html.c
@@ -166,7 +166,6 @@ void inlines_to_html(strbuf *html, node_inl* ils)
break;
case INL_RAW_HTML:
- case INL_ENTITY:
strbuf_put(html,
ils->content.literal.data,
ils->content.literal.len);
diff --git a/src/inlines.c b/src/inlines.c
index 6b17027..7b27150 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -5,6 +5,8 @@
#include
#include "stmd.h"
+#include "html/houdini.h"
+#include "utf8.h"
#include "uthash.h"
#include "scanners.h"
@@ -176,7 +178,6 @@ inline static node_inl* make_simple(int t)
#define make_str(s) make_literal(INL_STRING, s)
#define make_code(s) make_literal(INL_CODE, s)
#define make_raw_html(s) make_literal(INL_RAW_HTML, s)
-#define make_entity(s) make_literal(INL_ENTITY, s)
#define make_linebreak() make_simple(INL_LINEBREAK)
#define make_softbreak() make_simple(INL_SOFTBREAK)
#define make_emph(contents) make_inlines(INL_EMPH, contents)
@@ -191,7 +192,6 @@ extern void free_inlines(node_inl* e)
case INL_STRING:
case INL_RAW_HTML:
case INL_CODE:
- case INL_ENTITY:
chunk_free(&e->content.literal);
break;
case INL_LINEBREAK:
@@ -540,45 +540,34 @@ static node_inl* handle_backslash(subject *subj)
// Assumes the subject has an '&' character at the current position.
static node_inl* handle_entity(subject* subj)
{
- int match;
- node_inl *result;
- match = scan_entity(&subj->input, subj->pos);
- if (match) {
- result = make_entity(chunk_dup(&subj->input, subj->pos, match));
- subj->pos += match;
- } else {
- advance(subj);
- result = make_str(chunk_literal("&"));
- }
- return result;
+ strbuf ent = GH_BUF_INIT;
+ size_t len;
+
+ advance(subj);
+
+ len = houdini_unescape_ent(&ent,
+ subj->input.data + subj->pos,
+ subj->input.len - subj->pos
+ );
+
+ if (len == 0)
+ return make_str(chunk_literal("&"));
+
+ subj->pos += len;
+ return make_str(chunk_buf_detach(&ent));
}
// Like make_str, but parses entities.
// Returns an inline sequence consisting of str and entity elements.
static node_inl *make_str_with_entities(chunk *content)
{
- node_inl *result = NULL;
- node_inl *new;
- int searchpos;
- char c;
- subject subj;
-
- subject_from_chunk(&subj, content, NULL);
+ strbuf unescaped = GH_BUF_INIT;
- while ((c = peek_char(&subj))) {
- switch (c) {
- case '&':
- new = handle_entity(&subj);
- break;
- default:
- searchpos = chunk_strchr(&subj.input, '&', subj.pos);
- new = make_str(chunk_dup(&subj.input, subj.pos, searchpos - subj.pos));
- subj.pos = searchpos;
- }
- result = append_inlines(result, new);
+ if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) {
+ return make_str(chunk_buf_detach(&unescaped));
+ } else {
+ return make_str(*content);
}
-
- return result;
}
// Destructively unescape a string: remove backslashes before punctuation chars.
@@ -611,9 +600,9 @@ static unsigned char *clean_url(chunk *url, int is_email)
strbuf_puts(&buf, "mailto:");
if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
- strbuf_put(&buf, url->data + 1, url->len - 2);
+ houdini_unescape_html_f(&buf, url->data + 1, url->len - 2);
} else {
- strbuf_put(&buf, url->data, url->len);
+ houdini_unescape_html_f(&buf, url->data, url->len);
}
unescape_buffer(&buf);
@@ -636,9 +625,9 @@ static unsigned char *clean_title(chunk *title)
if ((first == '\'' && last == '\'') ||
(first == '(' && last == ')') ||
(first == '"' && last == '"')) {
- strbuf_set(&buf, title->data + 1, title->len - 2);
+ houdini_unescape_html_f(&buf, title->data + 1, title->len - 2);
} else {
- strbuf_set(&buf, title->data, title->len);
+ houdini_unescape_html_f(&buf, title->data, title->len);
}
unescape_buffer(&buf);
diff --git a/src/print.c b/src/print.c
index 0ff86fa..9240dac 100644
--- a/src/print.c
+++ b/src/print.c
@@ -145,11 +145,6 @@ extern void print_inlines(node_inl* ils, int indent)
print_str(ils->content.literal.data, ils->content.literal.len);
putchar('\n');
break;
- case INL_ENTITY:
- printf("entity ");
- print_str(ils->content.literal.data, ils->content.literal.len);
- putchar('\n');
- break;
case INL_LINK:
case INL_IMAGE:
printf("%s url=", ils->tag == INL_LINK ? "link" : "image");
diff --git a/src/stmd.h b/src/stmd.h
index be65371..c80eeda 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -17,7 +17,6 @@ struct node_inl {
INL_LINEBREAK,
INL_CODE,
INL_RAW_HTML,
- INL_ENTITY,
INL_EMPH,
INL_STRONG,
INL_LINK,
@@ -133,6 +132,4 @@ void print_blocks(node_block* blk, int indent);
void blocks_to_html(strbuf *html, node_block *b, bool tight);
void inlines_to_html(strbuf *html, node_inl *b);
-void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len);
-
#endif
diff --git a/src/utf8.c b/src/utf8.c
index cebd872..12d7ba5 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -3,7 +3,7 @@
#include
#include
-#include "stmd.h"
+#include "utf8.h"
static const int8_t utf8proc_utf8class[256] = {
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -23,6 +23,12 @@ static const int8_t utf8proc_utf8class[256] = {
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 };
+static void encode_unknown(strbuf *buf)
+{
+ static const unsigned char repl[] = {239, 191, 189};
+ strbuf_put(buf, repl, 3);
+}
+
ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len)
{
ssize_t length, i;
@@ -46,6 +52,46 @@ ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len)
return length;
}
+void utf8proc_detab(strbuf *ob, const unsigned char *line, size_t size)
+{
+ static const unsigned char whitespace[] = " ";
+
+ size_t i = 0, tab = 0;
+
+ while (i < size) {
+ size_t org = i;
+
+ while (i < size && line[i] != '\t' && line[i] <= 0x80) {
+ i++; tab++;
+ }
+
+ if (i > org)
+ strbuf_put(ob, line + org, i - org);
+
+ if (i >= size)
+ break;
+
+ if (line[i] == '\t') {
+ int numspaces = 4 - (tab % 4);
+ strbuf_put(ob, whitespace, numspaces);
+ i += 1;
+ tab += numspaces;
+ } else {
+ ssize_t charlen = utf8proc_charlen(line + i, size - i);
+
+ if (charlen < 0) {
+ encode_unknown(ob);
+ i++;
+ } else {
+ strbuf_put(ob, line + i, charlen);
+ i += charlen;
+ }
+
+ tab += 1;
+ }
+ }
+}
+
ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst)
{
ssize_t length;
@@ -89,9 +135,9 @@ void utf8proc_encode_char(int32_t uc, strbuf *buf)
unsigned char dst[4];
int len = 0;
- if (uc < 0x00) {
- assert(false);
- } else if (uc < 0x80) {
+ assert(uc >= 0);
+
+ if (uc < 0x80) {
dst[0] = uc;
len = 1;
} else if (uc < 0x800) {
@@ -116,7 +162,8 @@ void utf8proc_encode_char(int32_t uc, strbuf *buf)
dst[3] = 0x80 + (uc & 0x3F);
len = 4;
} else {
- assert(false);
+ encode_unknown(buf);
+ return;
}
strbuf_put(buf, dst, len);
@@ -133,7 +180,7 @@ void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len)
ssize_t char_len = utf8proc_iterate(str, len, &c);
if (char_len < 0) {
- bufpush(0xFFFD);
+ encode_unknown(dest);
continue;
}
--
cgit v1.2.3