aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile19
-rw-r--r--src/blocks.c1352
-rw-r--r--src/bstrlib.c2979
-rw-r--r--src/bstrlib.h304
-rw-r--r--src/buffer.c313
-rw-r--r--src/buffer.h119
-rw-r--r--src/case_fold_switch.inc (renamed from src/case_fold_switch.c)0
-rw-r--r--src/casefold.c2699
-rw-r--r--src/detab.c48
-rw-r--r--src/getopt.c199
-rw-r--r--src/inlines.c1711
-rw-r--r--src/main.c2
-rw-r--r--src/scanners.h28
-rw-r--r--src/scanners.re54
-rw-r--r--src/stmd.h76
-rw-r--r--src/utf8.c221
16 files changed, 2252 insertions, 7872 deletions
diff --git a/Makefile b/Makefile
index 55b6645..cb5938d 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
-CFLAGS=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS)
-LDFLAGS=-g -O3 -Wall -Werror
+CFLAGS=-ggdb3 -O0 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS)
+LDFLAGS=-ggdb3 -O0 -Wall -Werror
SRCDIR=src
DATADIR=data
@@ -41,13 +41,13 @@ testjs: spec.txt
benchjs:
node js/bench.js ${BENCHINP}
-$(PROG): $(SRCDIR)/main.c $(SRCDIR)/inlines.o $(SRCDIR)/blocks.o $(SRCDIR)/detab.o $(SRCDIR)/bstrlib.o $(SRCDIR)/scanners.o $(SRCDIR)/print.o $(SRCDIR)/html.o $(SRCDIR)/utf8.o
+$(PROG): $(SRCDIR)/main.c $(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/html.o $(SRCDIR)/utf8.o
$(CC) $(LDFLAGS) -o $@ $^
$(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re
re2c --case-insensitive -bis $< > $@ || (rm $@ && false)
-$(SRCDIR)/case_fold_switch.c: $(DATADIR)/CaseFolding-3.2.0.txt
+$(SRCDIR)/case_fold_switch.inc $(DATADIR)/CaseFolding-3.2.0.txt
perl mkcasefold.pl < $< > $@
.PHONY: leakcheck clean fuzztest dingus upload
@@ -58,6 +58,9 @@ dingus:
leakcheck: $(PROG)
cat oldtests/*/*.markdown | valgrind --leak-check=full --dsymutil=yes $(PROG)
+operf: $(PROG)
+ operf $(PROG) <bench.md >/dev/null
+
fuzztest:
for i in `seq 1 10`; do \
time cat /dev/urandom | head -c 100000 | iconv -f latin1 -t utf-8 | $(PROG) >/dev/null; done
@@ -69,7 +72,7 @@ update-site: spec.html narrative.html
(cd _site ; git pull ; git commit -a -m "Updated site for latest spec, narrative, js" ; git push; cd ..)
clean:
- -rm test $(SRCDIR)/*.o $(SRCDIR)/scanners.c
- -rm -r *.dSYM
- -rm README.html
- -rm spec.md fuzz.txt spec.html
+ -rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c
+ -rm -rf *.dSYM
+ -rm -f README.html
+ -rm -f spec.md fuzz.txt spec.html
diff --git a/src/blocks.c b/src/blocks.c
index 2776231..eabac03 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -1,4 +1,5 @@
#include <stdlib.h>
+#include <assert.h>
#include <stdio.h>
#include <stdbool.h>
#include <ctype.h>
@@ -10,738 +11,767 @@
static block* make_block(int tag, int start_line, int start_column)
{
- block* e;
- e = (block*) malloc(sizeof(block));
- e->tag = tag;
- e->open = true;
- e->last_line_blank = false;
- e->start_line = start_line;
- e->start_column = start_column;
- e->end_line = start_line;
- e->children = NULL;
- e->last_child = NULL;
- e->parent = NULL;
- e->top = NULL;
- e->attributes.refmap = NULL;
- e->string_content = bfromcstr("");
- e->inline_content = NULL;
- e->next = NULL;
- e->prev = NULL;
- return e;
+ block* e;
+ e = (block*) malloc(sizeof(block));
+ e->tag = tag;
+ e->open = true;
+ e->last_line_blank = false;
+ e->start_line = start_line;
+ e->start_column = start_column;
+ e->end_line = start_line;
+ e->children = NULL;
+ e->last_child = NULL;
+ e->parent = NULL;
+ e->top = NULL;
+ e->attributes.refmap = NULL;
+ gh_buf_init(&e->string_content, 32);
+ e->string_pos = 0;
+ e->inline_content = NULL;
+ e->next = NULL;
+ e->prev = NULL;
+ return e;
}
// Create a root document block.
extern block* make_document()
{
- block * e = make_block(document, 1, 1);
- reference * map = NULL;
- reference ** refmap;
- refmap = (reference**) malloc(sizeof(reference*));
- *refmap = map;
- e->attributes.refmap = refmap;
- e->top = e;
- return e;
+ block * e = make_block(document, 1, 1);
+ reference * map = NULL;
+ reference ** refmap;
+ refmap = (reference**) malloc(sizeof(reference*));
+ *refmap = map;
+ e->attributes.refmap = refmap;
+ e->top = e;
+ return e;
}
// Returns true if line has only space characters, else false.
-bool is_blank(bstring s, int offset)
+bool is_blank(gh_buf *s, int offset)
{
- char c;
- while ((c = bchar(s, offset))) {
- if (c == '\n') {
- return true;
- } else if (c == ' ') {
- offset++;
- } else {
- return false;
- }
- }
- return true;
+ while (offset < s->size) {
+ switch (s->ptr[offset]) {
+ case '\n':
+ return true;
+ case ' ':
+ offset++;
+ default:
+ return false;
+ }
+ }
+
+ return true;
}
static inline bool can_contain(int parent_type, int child_type)
{
- return ( parent_type == document ||
- parent_type == block_quote ||
- parent_type == list_item ||
- (parent_type == list && child_type == list_item) );
+ return ( parent_type == document ||
+ parent_type == block_quote ||
+ parent_type == list_item ||
+ (parent_type == list && child_type == list_item) );
}
static inline bool accepts_lines(int block_type)
{
- return (block_type == paragraph ||
- block_type == atx_header ||
- block_type == indented_code ||
- block_type == fenced_code);
+ return (block_type == paragraph ||
+ block_type == atx_header ||
+ block_type == indented_code ||
+ block_type == fenced_code);
}
-static int add_line(block* block, bstring ln, int offset)
+static void add_line(block* block, gh_buf *ln, int offset)
{
- bstring s = bmidstr(ln, offset, blength(ln) - offset);
- check(block->open, "attempted to add line (%s) to closed container (%d)",
- ln->data, block->tag);
- check(bformata(block->string_content, "%s", s->data) == 0,
- "could not append line to string_content");
- bdestroy(s);
- return 0;
- error:
- return -1;
+ assert(block->open);
+ gh_buf_put(&block->string_content, ln->ptr + offset, ln->size - offset);
}
-static int remove_trailing_blank_lines(bstring ln)
+static void remove_trailing_blank_lines(gh_buf *ln)
{
- bstring tofind = bfromcstr(" \t\r\n");
- int pos;
- // find last nonspace:
- pos = bninchrr(ln, blength(ln) - 1, tofind);
- if (pos == BSTR_ERR) { // all spaces
- bassigncstr(ln, "");
- } else {
- // find next newline after it
- pos = bstrchrp(ln, '\n', pos);
- if (pos != BSTR_ERR) {
- check(bdelete(ln, pos, blength(ln) - pos) != BSTR_ERR,
- "failed to delete trailing blank lines");
- }
- }
- bdestroy(tofind);
- return 0;
- error:
- return -1;
+ int i;
+
+ for (i = ln->size - 1; i >= 0; --i) {
+ char c = ln->ptr[i];
+
+ if (c != ' ' && c != '\t' && c != '\r' && c != '\n')
+ break;
+ }
+
+ if (i < 0) {
+ gh_buf_clear(ln);
+ return;
+ }
+
+ i = gh_buf_strchr(ln, '\n', i);
+ if (i >= 0)
+ gh_buf_truncate(ln, i + 1);
}
// Check to see if a block ends with a blank line, descending
// if needed into lists and sublists.
static bool ends_with_blank_line(block* block)
{
- if (block->last_line_blank) {
- return true;
- }
- if ((block->tag == list || block->tag == list_item) && block->last_child) {
- return ends_with_blank_line(block->last_child);
- } else {
- return false;
- }
+ if (block->last_line_blank) {
+ return true;
+ }
+ if ((block->tag == list || block->tag == list_item) && block->last_child) {
+ return ends_with_blank_line(block->last_child);
+ } else {
+ return false;
+ }
}
// Break out of all containing lists
static int break_out_of_lists(block ** bptr, int line_number)
{
- block * container = *bptr;
- block * b = container->top;
- // find first containing list:
- while (b && b->tag != list) {
- b = b->last_child;
- }
- if (b) {
- while (container && container != b) {
- finalize(container, line_number);
- container = container->parent;
- }
- finalize(b, line_number);
- *bptr = b->parent;
- }
- return 0;
+ block * container = *bptr;
+ block * b = container->top;
+ // find first containing list:
+ while (b && b->tag != list) {
+ b = b->last_child;
+ }
+ if (b) {
+ while (container && container != b) {
+ finalize(container, line_number);
+ container = container->parent;
+ }
+ finalize(b, line_number);
+ *bptr = b->parent;
+ }
+ return 0;
}
-extern int finalize(block* b, int line_number)
+extern void finalize(block* b, int line_number)
{
- int firstlinelen;
- int pos;
- block* item;
- block* subitem;
-
- check(b != NULL, "finalize called on null block");
- if (!b->open) {
- return 0; // don't do anything if the block is already closed
- }
- b->open = false;
- if (line_number > b->start_line) {
- b->end_line = line_number - 1;
- } else {
- b->end_line = line_number;
- }
-
- switch (b->tag) {
-
- case paragraph:
- pos = 0;
- while (bchar(b->string_content, 0) == '[' &&
- (pos = parse_reference(b->string_content,
- b->top->attributes.refmap))) {
- bdelete(b->string_content, 0, pos);
- }
- if (is_blank(b->string_content, 0)) {
- b->tag = reference_def;
- }
- break;
-
- case indented_code:
- remove_trailing_blank_lines(b->string_content);
- bformata(b->string_content, "\n");
- break;
-
- case fenced_code:
- // first line of contents becomes info
- firstlinelen = bstrchr(b->string_content, '\n');
- b->attributes.fenced_code_data.info =
- bmidstr(b->string_content, 0, firstlinelen);
- bdelete(b->string_content, 0, firstlinelen + 1); // +1 for \n
- btrimws(b->attributes.fenced_code_data.info);
- unescape(b->attributes.fenced_code_data.info);
- break;
-
- case list: // determine tight/loose status
- b->attributes.list_data.tight = true; // tight by default
- item = b->children;
-
- while (item) {
- // check for non-final non-empty list item ending with blank line:
- if (item->last_line_blank && item->next) {
- b->attributes.list_data.tight = false;
- break;
- }
- // recurse into children of list item, to see if there are
- // spaces between them:
- subitem = item->children;
- while (subitem) {
- if (ends_with_blank_line(subitem) &&
- (item->next || subitem->next)) {
- b->attributes.list_data.tight = false;
- break;
- }
- subitem = subitem->next;
- }
- if (!(b->attributes.list_data.tight)) {
- break;
- }
- item = item->next;
- }
-
- break;
-
- default:
- break;
- }
-
- return 0;
- error:
- return -1;
+ int firstlinelen;
+ int pos;
+ block* item;
+ block* subitem;
+
+ if (!b->open)
+ return; // don't do anything if the block is already closed
+
+ b->open = false;
+ if (line_number > b->start_line) {
+ b->end_line = line_number - 1;
+ } else {
+ b->end_line = line_number;
+ }
+
+ switch (b->tag) {
+ case paragraph:
+ pos = 0;
+ while (gh_buf_at(&b->string_content, b->string_pos) == '[' &&
+ (pos = parse_reference(&b->string_content, b->string_pos,
+ b->top->attributes.refmap))) {
+ b->string_pos = pos;
+ }
+ if (is_blank(&b->string_content, b->string_pos)) {
+ b->tag = reference_def;
+ }
+ break;
+
+ case indented_code:
+ remove_trailing_blank_lines(&b->string_content);
+ gh_buf_putc(&b->string_content, '\n');
+ break;
+
+ case fenced_code:
+ // first line of contents becomes info
+ firstlinelen = gh_buf_strchr(&b->string_content, '\n', b->string_pos);
+ gh_buf_set(
+ &b->attributes.fenced_code_data.info,
+ b->string_content.ptr + b->string_pos,
+ firstlinelen
+ );
+
+ b->string_pos = firstlinelen + 1;
+
+ gh_buf_trim(&b->attributes.fenced_code_data.info);
+ unescape_buffer(&b->attributes.fenced_code_data.info);
+ break;
+
+ case list: // determine tight/loose status
+ b->attributes.list_data.tight = true; // tight by default
+ item = b->children;
+
+ while (item) {
+ // check for non-final non-empty list item ending with blank line:
+ if (item->last_line_blank && item->next) {
+ b->attributes.list_data.tight = false;
+ break;
+ }
+ // recurse into children of list item, to see if there are
+ // spaces between them:
+ subitem = item->children;
+ while (subitem) {
+ if (ends_with_blank_line(subitem) &&
+ (item->next || subitem->next)) {
+ b->attributes.list_data.tight = false;
+ break;
+ }
+ subitem = subitem->next;
+ }
+ if (!(b->attributes.list_data.tight)) {
+ break;
+ }
+ item = item->next;
+ }
+
+ break;
+
+ default:
+ break;
+ }
}
// Add a block as child of another. Return pointer to child.
extern block* add_child(block* parent,
- int block_type, int start_line, int start_column)
+ int block_type, int start_line, int start_column)
{
- // if 'parent' isn't the kind of block that can accept this child,
- // then back up til we hit a block that can.
- while (!can_contain(parent->tag, block_type)) {
- finalize(parent, start_line);
- parent = parent->parent;
- }
-
- check(parent != NULL, "parent container cannot accept children");
-
- block* child = make_block(block_type, start_line, start_column);
- child->parent = parent;
- child->top = parent->top;
-
- if (parent->last_child) {
- parent->last_child->next = child;
- child->prev = parent->last_child;
- } else {
- parent->children = child;
- child->prev = NULL;
- }
- parent->last_child = child;
- return child;
- error:
- return NULL;
+ assert(parent);
+
+ // if 'parent' isn't the kind of block that can accept this child,
+ // then back up til we hit a block that can.
+ while (!can_contain(parent->tag, block_type)) {
+ finalize(parent, start_line);
+ parent = parent->parent;
+ }
+
+ block* child = make_block(block_type, start_line, start_column);
+ child->parent = parent;
+ child->top = parent->top;
+
+ if (parent->last_child) {
+ parent->last_child->next = child;
+ child->prev = parent->last_child;
+ } else {
+ parent->children = child;
+ child->prev = NULL;
+ }
+ parent->last_child = child;
+ return child;
}
// Free a block list and any children.
extern void free_blocks(block* e)
{
- block * next;
- while (e != NULL) {
- next = e->next;
- free_inlines(e->inline_content);
- bdestroy(e->string_content);
- if (e->tag == fenced_code) {
- bdestroy(e->attributes.fenced_code_data.info);
- } else if (e->tag == document) {
- free_reference_map(e->attributes.refmap);
- }
- free_blocks(e->children);
- free(e);
- e = next;
- }
+ block * next;
+ while (e != NULL) {
+ next = e->next;
+ free_inlines(e->inline_content);
+ gh_buf_free(&e->string_content);
+ if (e->tag == fenced_code) {
+ gh_buf_free(&e->attributes.fenced_code_data.info);
+ } else if (e->tag == document) {
+ free_reference_map(e->attributes.refmap);
+ }
+ free_blocks(e->children);
+ free(e);
+ e = next;
+ }
}
// Walk through block and all children, recursively, parsing
// string content into inline content where appropriate.
-int process_inlines(block* cur, reference** refmap)
+void process_inlines(block* cur, reference** refmap)
{
- switch (cur->tag) {
-
- case paragraph:
- case atx_header:
- case setext_header:
- check(cur->string_content != NULL, "string_content is NULL");
- cur->inline_content = parse_inlines(cur->string_content, refmap);
- bdestroy(cur->string_content);
- cur->string_content = NULL;
- break;
-
- default:
- break;
- }
-
- block * child = cur->children;
- while (child != NULL) {
- process_inlines(child, refmap);
- child = child->next;
- }
-
- return 0;
- error:
- return -1;
+ switch (cur->tag) {
+ case paragraph:
+ case atx_header:
+ case setext_header:
+ cur->inline_content = parse_inlines(&cur->string_content, cur->string_pos, refmap);
+ // MEM
+ // gh_buf_free(&cur->string_content);
+ break;
+
+ default:
+ break;
+ }
+
+ block *child = cur->children;
+ while (child != NULL) {
+ process_inlines(child, refmap);
+ child = child->next;
+ }
}
// Attempts to parse a list item marker (bullet or enumerated).
// On success, returns length of the marker, and populates
// data with the details. On failure, returns 0.
-static int parse_list_marker(bstring ln, int pos,
- struct ListData ** dataptr)
+static int parse_list_marker(gh_buf *ln, int pos,
+ struct ListData ** dataptr)
{
- char c;
- int startpos;
- int start = 1;
- struct ListData * data;
-
- startpos = pos;
- c = bchar(ln, pos);
-
- if ((c == '*' || c == '-' || c == '+') && !scan_hrule(ln, pos)) {
- pos++;
- if (!isspace(bchar(ln, pos))) {
- return 0;
- }
- data = malloc(sizeof(struct ListData));
- data->marker_offset = 0; // will be adjusted later
- data->list_type = bullet;
- data->bullet_char = c;
- data->start = 1;
- data->delimiter = period;
- data->tight = false;
-
- } else if (isdigit(c)) {
-
- pos++;
- while (isdigit(bchar(ln, pos))) {
- pos++;
- }
-
- if (!sscanf((char *) ln->data + startpos, "%d", &start)) {
- log_err("sscanf failed");
- return 0;
- }
-
- c = bchar(ln, pos);
- if (c == '.' || c == ')') {
- pos++;
- if (!isspace(bchar(ln, pos))) {
- return 0;
- }
- data = malloc(sizeof(struct ListData));
- data->marker_offset = 0; // will be adjusted later
- data->list_type = ordered;
- data->bullet_char = 0;
- data->start = start;
- data->delimiter = (c == '.' ? period : parens);
- data->tight = false;
- } else {
- return 0;
- }
-
- } else {
- return 0;
- }
-
- *dataptr = data;
- return (pos - startpos);
+ char c;
+ int startpos;
+ struct ListData * data;
+
+ startpos = pos;
+ c = gh_buf_at(ln, pos);
+
+ if ((c == '*' || c == '-' || c == '+') && !scan_hrule(ln, pos)) {
+ pos++;
+ if (!isspace(gh_buf_at(ln, pos))) {
+ return 0;
+ }
+ data = malloc(sizeof(struct ListData));
+ data->marker_offset = 0; // will be adjusted later
+ data->list_type = bullet;
+ data->bullet_char = c;
+ data->start = 1;
+ data->delimiter = period;
+ data->tight = false;
+
+ } else if (isdigit(c)) {
+ int start = 0;
+
+ do {
+ start = (10 * start) + (gh_buf_at(ln, pos) - '0');
+ pos++;
+ } while (isdigit(gh_buf_at(ln, pos)));
+
+ c = gh_buf_at(ln, pos);
+ if (c == '.' || c == ')') {
+ pos++;
+ if (!isspace(gh_buf_at(ln, pos))) {
+ return 0;
+ }
+ data = malloc(sizeof(struct ListData));
+ data->marker_offset = 0; // will be adjusted later
+ data->list_type = ordered;
+ data->bullet_char = 0;
+ data->start = start;
+ data->delimiter = (c == '.' ? period : parens);
+ data->tight = false;
+ } else {
+ return 0;
+ }
+
+ } else {
+ return 0;
+ }
+
+ *dataptr = data;
+ return (pos - startpos);
}
// Return 1 if list item belongs in list, else 0.
static int lists_match(struct ListData list_data,
- struct ListData item_data)
+ struct ListData item_data)
+{
+ return (list_data.list_type == item_data.list_type &&
+ list_data.delimiter == item_data.delimiter &&
+ // list_data.marker_offset == item_data.marker_offset &&
+ list_data.bullet_char == item_data.bullet_char);
+}
+
+static void expand_tabs(gh_buf *ob, const char *line, size_t size)
+{
+ size_t i = 0, tab = 0;
+
+ while (i < size) {
+ size_t org = i;
+
+ while (i < size && line[i] != '\t') {
+ i++; tab++;
+ }
+
+ if (i > org)
+ gh_buf_put(ob, line + org, i - org);
+
+ if (i >= size)
+ break;
+
+ do {
+ gh_buf_putc(ob, ' '); tab++;
+ } while (tab % 4);
+
+ i++;
+ }
+}
+
+extern block *stmd_parse_document(const char *buffer, size_t len)
{
- return (list_data.list_type == item_data.list_type &&
- list_data.delimiter == item_data.delimiter &&
- // list_data.marker_offset == item_data.marker_offset &&
- list_data.bullet_char == item_data.bullet_char);
+ gh_buf line = GH_BUF_INIT;
+
+ block *document = make_document();
+ int linenum = 1;
+ const char *end = buffer + len;
+
+ while (buffer < end) {
+ const char *eol = memchr(buffer, '\n', end - buffer);
+
+ if (!eol) {
+ expand_tabs(&line, buffer, end - buffer);
+ buffer = end;
+ } else {
+ expand_tabs(&line, buffer, (eol - buffer) + 1);
+ buffer += (eol - buffer) + 1;
+ }
+
+ incorporate_line(&line, linenum, &document);
+ gh_buf_clear(&line);
+ linenum++;
+ }
+
+ gh_buf_free(&line);
+
+ while (document != document->top) {
+ finalize(document, linenum);
+ document = document->parent;
+ }
+
+ finalize(document, linenum);
+ process_inlines(document, document->attributes.refmap);
+
+ return document;
}
// Process one line at a time, modifying a block.
// Returns 0 if successful. curptr is changed to point to
// the currently open block.
-extern int incorporate_line(bstring ln, int line_number, block** curptr)
+extern void incorporate_line(gh_buf *ln, int line_number, block** curptr)
{
- block* last_matched_container;
- int offset = 0;
- int matched = 0;
- int lev = 0;
- int i;
- struct ListData * data = NULL;
- bool all_matched = true;
- block* container;
- block* cur = *curptr;
- bool blank = false;
- int first_nonspace;
- int indent;
-
- // detab input line
- check(bdetab(ln, 1) != BSTR_ERR,
- "invalid UTF-8 sequence in line %d\n", line_number);
-
- // container starts at the document root.
- container = cur->top;
-
- // for each containing block, try to parse the associated line start.
- // bail out on failure: container will point to the last matching block.
-
- while (container->last_child && container->last_child->open) {
- container = container->last_child;
-
- first_nonspace = offset;
- while (bchar(ln, first_nonspace) == ' ') {
- first_nonspace++;
- }
-
- indent = first_nonspace - offset;
- blank = bchar(ln, first_nonspace) == '\n';
-
- if (container->tag == block_quote) {
-
- matched = indent <= 3 && bchar(ln, first_nonspace) == '>';
- if (matched) {
- offset = first_nonspace + 1;
- if (bchar(ln, offset) == ' ') {
- offset++;
- }
- } else {
- all_matched = false;
- }
-
- } else if (container->tag == list_item) {
-
- if (indent >= container->attributes.list_data.marker_offset +
- container->attributes.list_data.padding) {
- offset += container->attributes.list_data.marker_offset +
- container->attributes.list_data.padding;
- } else if (blank) {
- offset = first_nonspace;
- } else {
- all_matched = false;
- }
-
- } else if (container->tag == indented_code) {
-
- if (indent >= CODE_INDENT) {
- offset += CODE_INDENT;
- } else if (blank) {
- offset = first_nonspace;
- } else {
- all_matched = false;
- }
-
- } else if (container->tag == atx_header ||
- container->tag == setext_header) {
-
- // a header can never contain more than one line
- all_matched = false;
-
- } else if (container->tag == fenced_code) {
-
- // skip optional spaces of fence offset
- i = container->attributes.fenced_code_data.fence_offset;
- while (i > 0 && bchar(ln, offset) == ' ') {
- offset++;
- i--;
- }
-
- } else if (container->tag == html_block) {
-
- if (blank) {
- all_matched = false;
- }
-
- } else if (container->tag == paragraph) {
-
- if (blank) {
- container->last_line_blank =true;
- all_matched = false;
- }
-
- }
-
- if (!all_matched) {
- container = container->parent; // back up to last matching block
- break;
- }
- }
-
- last_matched_container = container;
-
- // check to see if we've hit 2nd blank line, break out of list:
- if (blank && container->last_line_blank) {
- break_out_of_lists(&container, line_number);
- }
-
- // unless last matched container is code block, try new container starts:
- while (container->tag != fenced_code && container->tag != indented_code &&
- container->tag != html_block) {
-
- first_nonspace = offset;
- while (bchar(ln, first_nonspace) == ' ') {
- first_nonspace++;
- }
-
- indent = first_nonspace - offset;
- blank = bchar(ln, first_nonspace) == '\n';
-
- if (indent >= CODE_INDENT) {
-
- if (cur->tag != paragraph && !blank) {
- offset += CODE_INDENT;
- container = add_child(container, indented_code, line_number, offset + 1);
- } else { // indent > 4 in lazy line
- break;
- }
-
- } else if (bchar(ln, first_nonspace) == '>') {
-
- offset = first_nonspace + 1;
- // optional following character
- if (bchar(ln, offset) == ' ') {
- offset++;
- }
- container = add_child(container, block_quote, line_number, offset + 1);
-
- } else if ((matched = scan_atx_header_start(ln, first_nonspace))) {
-
- offset = first_nonspace + matched;
- container = add_child(container, atx_header, line_number, offset + 1);
- int hashpos = bstrchrp(ln, '#', first_nonspace);
- check(hashpos != BSTR_ERR, "no # found in atx header start");
- int level = 0;
- while (bchar(ln, hashpos) == '#') {
- level++;
- hashpos++;
- }
- container->attributes.header_level = level;
-
- } else if ((matched = scan_open_code_fence(ln, first_nonspace))) {
-
- container = add_child(container, fenced_code, line_number,
- first_nonspace + 1);
- container->attributes.fenced_code_data.fence_char = bchar(ln,
- first_nonspace);
- container->attributes.fenced_code_data.fence_length = matched;
- container->attributes.fenced_code_data.fence_offset =
- first_nonspace - offset;
- offset = first_nonspace + matched;
-
- } else if ((matched = scan_html_block_tag(ln, first_nonspace))) {
-
- container = add_child(container, html_block, line_number,
- first_nonspace + 1);
- // note, we don't adjust offset because the tag is part of the text
-
- } else if (container->tag == paragraph &&
- (lev = scan_setext_header_line(ln, first_nonspace)) &&
- // check that there is only one line in the paragraph:
- bstrrchrp(container->string_content, '\n',
- blength(container->string_content) - 2) == BSTR_ERR) {
-
- container->tag = setext_header;
- container->attributes.header_level = lev;
- offset = blength(ln) - 1;
-
- } else if (!(container->tag == paragraph && !all_matched) &&
- (matched = scan_hrule(ln, first_nonspace))) {
-
- // it's only now that we know the line is not part of a setext header:
- container = add_child(container, hrule, line_number, first_nonspace + 1);
- finalize(container, line_number);
- container = container->parent;
- offset = blength(ln) - 1;
-
- } else if ((matched = parse_list_marker(ln, first_nonspace, &data))) {
-
- // compute padding:
- offset = first_nonspace + matched;
- i = 0;
- while (i <= 5 && bchar(ln, offset + i) == ' ') {
- i++;
- }
- // i = number of spaces after marker, up to 5
- if (i >= 5 || i < 1 || bchar(ln, offset) == '\n') {
- data->padding = matched + 1;
- if (i > 0) {
- offset += 1;
- }
- } else {
- data->padding = matched + i;
- offset += i;
- }
-
- // check container; if it's a list, see if this list item
- // can continue the list; otherwise, create a list container.
-
- data->marker_offset = indent;
-
- if (container->tag != list ||
- !lists_match(container->attributes.list_data, *data)) {
- container = add_child(container, list, line_number,
- first_nonspace + 1);
- container->attributes.list_data = *data;
- }
-
- // add the list item
- container = add_child(container, list_item, line_number,
- first_nonspace + 1);
- container->attributes.list_data = *data;
- free(data);
-
- } else {
- break;
- }
-
- if (accepts_lines(container->tag)) {
- // if it's a line container, it can't contain other containers
- break;
- }
- }
-
- // what remains at offset is a text line. add the text to the
- // appropriate container.
-
- first_nonspace = offset;
- while (bchar(ln, first_nonspace) == ' ') {
- first_nonspace++;
- }
-
- indent = first_nonspace - offset;
- blank = bchar(ln, first_nonspace) == '\n';
-
- // block quote lines are never blank as they start with >
- // and we don't count blanks in fenced code for purposes of tight/loose
- // lists or breaking out of lists. we also don't set last_line_blank
- // on an empty list item.
- container->last_line_blank = (blank &&
- container->tag != block_quote &&
- container->tag != fenced_code &&
- !(container->tag == list_item &&
- container->children == NULL &&
- container->start_line == line_number));
-
- block *cont = container;
- while (cont->parent) {
- cont->parent->last_line_blank = false;
- cont = cont->parent;
- }
-
- if (cur != last_matched_container &&
- container == last_matched_container &&
- !blank &&
- cur->tag == paragraph &&
- blength(cur->string_content) > 0) {
-
- check(add_line(cur, ln, offset) == 0, "could not add line");
-
- } else { // not a lazy continuation
-
- // finalize any blocks that were not matched and set cur to container:
- while (cur != last_matched_container) {
-
- finalize(cur, line_number);
- cur = cur->parent;
- check(cur != NULL, "cur is NULL, last_matched_container->tag = %d",
- last_matched_container->tag);
-
- }
-
- if (container->tag == indented_code) {
-
- check(add_line(container, ln, offset) == 0, "could not add line");
-
- } else if (container->tag == fenced_code) {
-
- matched = (indent <= 3
- && bchar(ln, first_nonspace) == container->attributes.fenced_code_data.fence_char)
- && scan_close_code_fence(ln, first_nonspace,
- container->attributes.fenced_code_data.fence_length);
- if (matched) {
- // if closing fence, don't add line to container; instead, close it:
- finalize(container, line_number);
- container = container->parent; // back up to parent
- } else {
- check(add_line(container, ln, offset) == 0, "could not add line");
- }
-
- } else if (container->tag == html_block) {
-
- check(add_line(container, ln, offset) == 0, "could not add line");
-
- } else if (blank) {
-
- // ??? do nothing
-
- } else if (container->tag == atx_header) {
-
- // chop off trailing ###s...use a scanner?
- brtrimws(ln);
- int p = blength(ln) - 1;
- int numhashes = 0;
- // if string ends in #s, remove these:
- while (bchar(ln, p) == '#') {
- p--;
- numhashes++;
- }
- if (bchar(ln, p) == '\\') {
- // the last # was escaped, so we include it.
- p++;
- numhashes--;
- }
- check(bdelete(ln, p + 1, numhashes) != BSTR_ERR,
- "could not delete final hashes");
- check(add_line(container, ln, first_nonspace) == 0, "could not add line");
- finalize(container, line_number);
- container = container->parent;
-
- } else if (accepts_lines(container->tag)) {
-
- check(add_line(container, ln, first_nonspace) == 0, "could not add line");
+ block* last_matched_container;
+ int offset = 0;
+ int matched = 0;
+ int lev = 0;
+ int i;
+ struct ListData * data = NULL;
+ bool all_matched = true;
+ block* container;
+ block* cur = *curptr;
+ bool blank = false;
+ int first_nonspace;
+ int indent;
+
+ // container starts at the document root.
+ container = cur->top;
+
+ // for each containing block, try to parse the associated line start.
+ // bail out on failure: container will point to the last matching block.
+
+ while (container->last_child && container->last_child->open) {
+ container = container->last_child;
+
+ first_nonspace = offset;
+ while (gh_buf_at(ln, first_nonspace) == ' ') {
+ first_nonspace++;
+ }
+
+ indent = first_nonspace - offset;
+ blank = gh_buf_at(ln, first_nonspace) == '\n';
+
+ if (container->tag == block_quote) {
+
+ matched = indent <= 3 && gh_buf_at(ln, first_nonspace) == '>';
+ if (matched) {
+ offset = first_nonspace + 1;
+ if (gh_buf_at(ln, offset) == ' ') {
+ offset++;
+ }
+ } else {
+ all_matched = false;
+ }
+
+ } else if (container->tag == list_item) {
+
+ if (indent >= container->attributes.list_data.marker_offset +
+ container->attributes.list_data.padding) {
+ offset += container->attributes.list_data.marker_offset +
+ container->attributes.list_data.padding;
+ } else if (blank) {
+ offset = first_nonspace;
+ } else {
+ all_matched = false;
+ }
+
+ } else if (container->tag == indented_code) {
+
+ if (indent >= CODE_INDENT) {
+ offset += CODE_INDENT;
+ } else if (blank) {
+ offset = first_nonspace;
+ } else {
+ all_matched = false;
+ }
+
+ } else if (container->tag == atx_header ||
+ container->tag == setext_header) {
+
+ // a header can never contain more than one line
+ all_matched = false;
+
+ } else if (container->tag == fenced_code) {
+
+ // skip optional spaces of fence offset
+ i = container->attributes.fenced_code_data.fence_offset;
+ while (i > 0 && gh_buf_at(ln, offset) == ' ') {
+ offset++;
+ i--;
+ }
+
+ } else if (container->tag == html_block) {
+
+ if (blank) {
+ all_matched = false;
+ }
+
+ } else if (container->tag == paragraph) {
+
+ if (blank) {
+ container->last_line_blank = true;
+ all_matched = false;
+ }
+
+ }
+
+ if (!all_matched) {
+ container = container->parent; // back up to last matching block
+ break;
+ }
+ }
+
+ last_matched_container = container;
+
+ // check to see if we've hit 2nd blank line, break out of list:
+ if (blank && container->last_line_blank) {
+ break_out_of_lists(&container, line_number);
+ }
+
+ // unless last matched container is code block, try new container starts:
+ while (container->tag != fenced_code && container->tag != indented_code &&
+ container->tag != html_block) {
+
+ first_nonspace = offset;
+ while (gh_buf_at(ln, first_nonspace) == ' ') {
+ first_nonspace++;
+ }
+
+ indent = first_nonspace - offset;
+ blank = gh_buf_at(ln, first_nonspace) == '\n';
+
+ if (indent >= CODE_INDENT) {
+
+ if (cur->tag != paragraph && !blank) {
+ offset += CODE_INDENT;
+ container = add_child(container, indented_code, line_number, offset + 1);
+ } else { // indent > 4 in lazy line
+ break;
+ }
+
+ } else if (gh_buf_at(ln, first_nonspace) == '>') {
+
+ offset = first_nonspace + 1;
+ // optional following character
+ if (gh_buf_at(ln, offset) == ' ') {
+ offset++;
+ }
+ container = add_child(container, block_quote, line_number, offset + 1);
+
+ } else if ((matched = scan_atx_header_start(ln, first_nonspace))) {
+
+ offset = first_nonspace + matched;
+ container = add_child(container, atx_header, line_number, offset + 1);
+
+ int hashpos = gh_buf_strchr(ln, '#', first_nonspace);
+ assert(hashpos >= 0);
+
+ int level = 0;
+ while (gh_buf_at(ln, hashpos) == '#') {
+ level++;
+ hashpos++;
+ }
+ container->attributes.header_level = level;
+
+ } else if ((matched = scan_open_code_fence(ln, first_nonspace))) {
+
+ container = add_child(container, fenced_code, line_number,
+ first_nonspace + 1);
+ container->attributes.fenced_code_data.fence_char = gh_buf_at(ln,
+ first_nonspace);
+ container->attributes.fenced_code_data.fence_length = matched;
+ container->attributes.fenced_code_data.fence_offset =
+ first_nonspace - offset;
+ offset = first_nonspace + matched;
+
+ } else if ((matched = scan_html_block_tag(ln, first_nonspace))) {
+
+ container = add_child(container, html_block, line_number,
+ first_nonspace + 1);
+ // note, we don't adjust offset because the tag is part of the text
+
+ } else if (container->tag == paragraph &&
+ (lev = scan_setext_header_line(ln, first_nonspace)) &&
+ // check that there is only one line in the paragraph:
+ gh_buf_strrchr(&container->string_content, '\n',
+ gh_buf_len(&container->string_content) - 2) < 0) {
+
+ container->tag = setext_header;
+ container->attributes.header_level = lev;
+ offset = gh_buf_len(ln) - 1;
+
+ } else if (!(container->tag == paragraph && !all_matched) &&
+ (matched = scan_hrule(ln, first_nonspace))) {
+
+ // it's only now that we know the line is not part of a setext header:
+ container = add_child(container, hrule, line_number, first_nonspace + 1);
+ finalize(container, line_number);
+ container = container->parent;
+ offset = gh_buf_len(ln) - 1;
+
+ } else if ((matched = parse_list_marker(ln, first_nonspace, &data))) {
+
+ // compute padding:
+ offset = first_nonspace + matched;
+ i = 0;
+ while (i <= 5 && gh_buf_at(ln, offset + i) == ' ') {
+ i++;
+ }
+ // i = number of spaces after marker, up to 5
+ if (i >= 5 || i < 1 || gh_buf_at(ln, offset) == '\n') {
+ data->padding = matched + 1;
+ if (i > 0) {
+ offset += 1;
+ }
+ } else {
+ data->padding = matched + i;
+ offset += i;
+ }
+
+ // check container; if it's a list, see if this list item
+ // can continue the list; otherwise, create a list container.
+
+ data->marker_offset = indent;
+
+ if (container->tag != list ||
+ !lists_match(container->attributes.list_data, *data)) {
+ container = add_child(container, list, line_number,
+ first_nonspace + 1);
+ container->attributes.list_data = *data;
+ }
+
+ // add the list item
+ container = add_child(container, list_item, line_number,
+ first_nonspace + 1);
+ container->attributes.list_data = *data;
+ free(data);
+
+ } else {
+ break;
+ }
+
+ if (accepts_lines(container->tag)) {
+ // if it's a line container, it can't contain other containers
+ break;
+ }
+ }
+
+ // what remains at offset is a text line. add the text to the
+ // appropriate container.
+
+ first_nonspace = offset;
+ while (gh_buf_at(ln, first_nonspace) == ' ') {
+ first_nonspace++;
+ }
+
+ indent = first_nonspace - offset;
+ blank = gh_buf_at(ln, first_nonspace) == '\n';
+
+ // block quote lines are never blank as they start with >
+ // and we don't count blanks in fenced code for purposes of tight/loose
+ // lists or breaking out of lists. we also don't set last_line_blank
+ // on an empty list item.
+ container->last_line_blank = (blank &&
+ container->tag != block_quote &&
+ container->tag != fenced_code &&
+ !(container->tag == list_item &&
+ container->children == NULL &&
+ container->start_line == line_number));
+
+ block *cont = container;
+ while (cont->parent) {
+ cont->parent->last_line_blank = false;
+ cont = cont->parent;
+ }
+
+ if (cur != last_matched_container &&
+ container == last_matched_container &&
+ !blank &&
+ cur->tag == paragraph &&
+ gh_buf_len(&cur->string_content) > 0) {
+
+ add_line(cur, ln, offset);
+
+ } else { // not a lazy continuation
+
+ // finalize any blocks that were not matched and set cur to container:
+ while (cur != last_matched_container) {
+
+ finalize(cur, line_number);
+ cur = cur->parent;
+ assert(cur != NULL);
+ }
+
+ if (container->tag == indented_code) {
+
+ add_line(container, ln, offset);
+
+ } else if (container->tag == fenced_code) {
+
+ matched = (indent <= 3
+ && gh_buf_at(ln, first_nonspace) == container->attributes.fenced_code_data.fence_char)
+ && scan_close_code_fence(ln, first_nonspace,
+ container->attributes.fenced_code_data.fence_length);
+ if (matched) {
+ // if closing fence, don't add line to container; instead, close it:
+ finalize(container, line_number);
+ container = container->parent; // back up to parent
+ } else {
+ add_line(container, ln, offset);
+ }
+
+ } else if (container->tag == html_block) {
+
+ add_line(container, ln, offset);
+
+ } else if (blank) {
+
+ // ??? do nothing
+
+ } else if (container->tag == atx_header) {
+ // chop off trailing ###s...use a scanner?
+ gh_buf_trim(ln);
+ int p = gh_buf_len(ln) - 1;
+
+ // if string ends in #s, remove these:
+ while (gh_buf_at(ln, p) == '#') {
+ p--;
+ }
+ if (gh_buf_at(ln, p) == '\\') {
+ // the last # was escaped, so we include it.
+ p++;
+ }
+
+ gh_buf_truncate(ln, p + 1);
+ add_line(container, ln, first_nonspace);
+ finalize(container, line_number);
+ container = container->parent;
+
+ } else if (accepts_lines(container->tag)) {
+
+ add_line(container, ln, first_nonspace);
+
+ } else if (container->tag != hrule && container->tag != setext_header) {
+
+ // create paragraph container for line
+ container = add_child(container, paragraph, line_number, first_nonspace + 1);
+ add_line(container, ln, first_nonspace);
- } else if (container->tag != hrule && container->tag != setext_header) {
-
- // create paragraph container for line
- container = add_child(container, paragraph, line_number, first_nonspace + 1);
- check(add_line(container, ln, first_nonspace) == 0, "could not add line");
-
- } else {
+ } else {
+ assert(false);
+ }
- log_warn("Line %d with container type %d did not match any condition:\n\"%s\"",
- line_number, container->tag, ln->data);
-
- }
- *curptr = container;
- }
-
- return 0;
- error:
- return -1;
+ *curptr = container;
+ }
}
diff --git a/src/bstrlib.c b/src/bstrlib.c
deleted file mode 100644
index 1b19dbe..0000000
--- a/src/bstrlib.c
+++ /dev/null
@@ -1,2979 +0,0 @@
-/*
- * This source file is part of the bstring string library. This code was
- * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
- * BSD open source license or GPL v2.0. Refer to the accompanying documentation
- * for details on usage and license.
- */
-
-/*
- * bstrlib.c
- *
- * This file is the core module for implementing the bstring functions.
- */
-
-#if defined (_MSC_VER)
-/* These warnings from MSVC++ are totally pointless. */
-# define _CRT_SECURE_NO_WARNINGS
-#endif
-
-#include <stdio.h>
-#include <stddef.h>
-#include <stdarg.h>
-#include <stdlib.h>
-#include <string.h>
-#include <ctype.h>
-#include "bstrlib.h"
-
-/* Optionally include a mechanism for debugging memory */
-
-#if defined(MEMORY_DEBUG) || defined(BSTRLIB_MEMORY_DEBUG)
-#include "memdbg.h"
-#endif
-
-#ifndef bstr__alloc
-#define bstr__alloc(x) malloc (x)
-#endif
-
-#ifndef bstr__free
-#define bstr__free(p) free (p)
-#endif
-
-#ifndef bstr__realloc
-#define bstr__realloc(p,x) realloc ((p), (x))
-#endif
-
-#ifndef bstr__memcpy
-#define bstr__memcpy(d,s,l) memcpy ((d), (s), (l))
-#endif
-
-#ifndef bstr__memmove
-#define bstr__memmove(d,s,l) memmove ((d), (s), (l))
-#endif
-
-#ifndef bstr__memset
-#define bstr__memset(d,c,l) memset ((d), (c), (l))
-#endif
-
-#ifndef bstr__memcmp
-#define bstr__memcmp(d,c,l) memcmp ((d), (c), (l))
-#endif
-
-#ifndef bstr__memchr
-#define bstr__memchr(s,c,l) memchr ((s), (c), (l))
-#endif
-
-/* Just a length safe wrapper for memmove. */
-
-#define bBlockCopy(D,S,L) { if ((L) > 0) bstr__memmove ((D),(S),(L)); }
-
-/* Compute the snapped size for a given requested size. By snapping to powers
- of 2 like this, repeated reallocations are avoided. */
-static int snapUpSize (int i) {
- if (i < 8) {
- i = 8;
- } else {
- unsigned int j;
- j = (unsigned int) i;
-
- j |= (j >> 1);
- j |= (j >> 2);
- j |= (j >> 4);
- j |= (j >> 8); /* Ok, since int >= 16 bits */
-#if (UINT_MAX != 0xffff)
- j |= (j >> 16); /* For 32 bit int systems */
-#if (UINT_MAX > 0xffffffffUL)
- j |= (j >> 32); /* For 64 bit int systems */
-#endif
-#endif
- /* Least power of two greater than i */
- j++;
- if ((int) j >= i) i = (int) j;
- }
- return i;
-}
-
-/* int balloc (bstring b, int len)
- *
- * Increase the size of the memory backing the bstring b to at least len.
- */
-int balloc (bstring b, int olen) {
- int len;
- if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen <= 0 ||
- b->mlen < b->slen || olen <= 0) {
- return BSTR_ERR;
- }
-
- if (olen >= b->mlen) {
- unsigned char * x;
-
- if ((len = snapUpSize (olen)) <= b->mlen) return BSTR_OK;
-
- /* Assume probability of a non-moving realloc is 0.125 */
- if (7 * b->mlen < 8 * b->slen) {
-
- /* If slen is close to mlen in size then use realloc to reduce
- the memory defragmentation */
-
- reallocStrategy:;
-
- x = (unsigned char *) bstr__realloc (b->data, (size_t) len);
- if (x == NULL) {
-
- /* Since we failed, try allocating the tighest possible
- allocation */
-
- if (NULL == (x = (unsigned char *) bstr__realloc (b->data, (size_t) (len = olen)))) {
- return BSTR_ERR;
- }
- }
- } else {
-
- /* If slen is not close to mlen then avoid the penalty of copying
- the extra bytes that are allocated, but not considered part of
- the string */
-
- if (NULL == (x = (unsigned char *) bstr__alloc ((size_t) len))) {
-
- /* Perhaps there is no available memory for the two
- allocations to be in memory at once */
-
- goto reallocStrategy;
-
- } else {
- if (b->slen) bstr__memcpy ((char *) x, (char *) b->data, (size_t) b->slen);
- bstr__free (b->data);
- }
- }
- b->data = x;
- b->mlen = len;
- b->data[b->slen] = (unsigned char) '\0';
- }
-
- return BSTR_OK;
-}
-
-/* int ballocmin (bstring b, int len)
- *
- * Set the size of the memory backing the bstring b to len or b->slen+1,
- * whichever is larger. Note that repeated use of this function can degrade
- * performance.
- */
-int ballocmin (bstring b, int len) {
- unsigned char * s;
-
- if (b == NULL || b->data == NULL || (b->slen+1) < 0 || b->mlen <= 0 ||
- b->mlen < b->slen || len <= 0) {
- return BSTR_ERR;
- }
-
- if (len < b->slen + 1) len = b->slen + 1;
-
- if (len != b->mlen) {
- s = (unsigned char *) bstr__realloc (b->data, (size_t) len);
- if (NULL == s) return BSTR_ERR;
- s[b->slen] = (unsigned char) '\0';
- b->data = s;
- b->mlen = len;
- }
-
- return BSTR_OK;
-}
-
-/* bstring bfromcstr (const char * str)
- *
- * Create a bstring which contains the contents of the '\0' terminated char *
- * buffer str.
- */
-bstring bfromcstr (const char * str) {
-bstring b;
-int i;
-size_t j;
-
- if (str == NULL) return NULL;
- j = (strlen) (str);
- i = snapUpSize ((int) (j + (2 - (j != 0))));
- if (i <= (int) j) return NULL;
-
- b = (bstring) bstr__alloc (sizeof (struct tagbstring));
- if (NULL == b) return NULL;
- b->slen = (int) j;
- if (NULL == (b->data = (unsigned char *) bstr__alloc (b->mlen = i))) {
- bstr__free (b);
- return NULL;
- }
-
- bstr__memcpy (b->data, str, j+1);
- return b;
-}
-
-/* bstring bfromcstralloc (int mlen, const char * str)
- *
- * Create a bstring which contains the contents of the '\0' terminated char *
- * buffer str. The memory buffer backing the string is at least len
- * characters in length.
- */
-bstring bfromcstralloc (int mlen, const char * str) {
-bstring b;
-int i;
-size_t j;
-
- if (str == NULL) return NULL;
- j = (strlen) (str);
- i = snapUpSize ((int) (j + (2 - (j != 0))));
- if (i <= (int) j) return NULL;
-
- b = (bstring) bstr__alloc (sizeof (struct tagbstring));
- if (b == NULL) return NULL;
- b->slen = (int) j;
- if (i < mlen) i = mlen;
-
- if (NULL == (b->data = (unsigned char *) bstr__alloc (b->mlen = i))) {
- bstr__free (b);
- return NULL;
- }
-
- bstr__memcpy (b->data, str, j+1);
- return b;
-}
-
-/* bstring blk2bstr (const void * blk, int len)
- *
- * Create a bstring which contains the content of the block blk of length
- * len.
- */
-bstring blk2bstr (const void * blk, int len) {
-bstring b;
-int i;
-
- if (blk == NULL || len < 0) return NULL;
- b = (bstring) bstr__alloc (sizeof (struct tagbstring));
- if (b == NULL) return NULL;
- b->slen = len;
-
- i = len + (2 - (len != 0));
- i = snapUpSize (i);
-
- b->mlen = i;
-
- b->data = (unsigned char *) bstr__alloc ((size_t) b->mlen);
- if (b->data == NULL) {
- bstr__free (b);
- return NULL;
- }
-
- if (len > 0) bstr__memcpy (b->data, blk, (size_t) len);
- b->data[len] = (unsigned char) '\0';
-
- return b;
-}
-
-/* char * bstr2cstr (const_bstring s, char z)
- *
- * Create a '\0' terminated char * buffer which is equal to the contents of
- * the bstring s, except that any contained '\0' characters are converted
- * to the character in z. This returned value should be freed with a
- * bcstrfree () call, by the calling application.
- */
-char * bstr2cstr (const_bstring b, char z) {
-int i, l;
-char * r;
-
- if (b == NULL || b->slen < 0 || b->data == NULL) return NULL;
- l = b->slen;
- r = (char *) bstr__alloc ((size_t) (l + 1));
- if (r == NULL) return r;
-
- for (i=0; i < l; i ++) {
- r[i] = (char) ((b->data[i] == '\0') ? z : (char) (b->data[i]));
- }
-
- r[l] = (unsigned char) '\0';
-
- return r;
-}
-
-/* int bcstrfree (char * s)
- *
- * Frees a C-string generated by bstr2cstr (). This is normally unnecessary
- * since it just wraps a call to bstr__free (), however, if bstr__alloc ()
- * and bstr__free () have been redefined as a macros within the bstrlib
- * module (via defining them in memdbg.h after defining
- * BSTRLIB_MEMORY_DEBUG) with some difference in behaviour from the std
- * library functions, then this allows a correct way of freeing the memory
- * that allows higher level code to be independent from these macro
- * redefinitions.
- */
-int bcstrfree (char * s) {
- if (s) {
- bstr__free (s);
- return BSTR_OK;
- }
- return BSTR_ERR;
-}
-
-/* int bconcat (bstring b0, const_bstring b1)
- *
- * Concatenate the bstring b1 to the bstring b0.
- */
-int bconcat (bstring b0, const_bstring b1) {
-int len, d;
-bstring aux = (bstring) b1;
-
- if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL) return BSTR_ERR;
-
- d = b0->slen;
- len = b1->slen;
- if ((d | (b0->mlen - d) | len | (d + len)) < 0) return BSTR_ERR;
-
- if (b0->mlen <= d + len + 1) {
- ptrdiff_t pd = b1->data - b0->data;
- if (0 <= pd && pd < b0->mlen) {
- if (NULL == (aux = bstrcpy (b1))) return BSTR_ERR;
- }
- if (balloc (b0, d + len + 1) != BSTR_OK) {
- if (aux != b1) bdestroy (aux);
- return BSTR_ERR;
- }
- }
-
- bBlockCopy (&b0->data[d], &aux->data[0], (size_t) len);
- b0->data[d + len] = (unsigned char) '\0';
- b0->slen = d + len;
- if (aux != b1) bdestroy (aux);
- return BSTR_OK;
-}
-
-/* int bconchar (bstring b, char c)
-/ *
- * Concatenate the single character c to the bstring b.
- */
-int bconchar (bstring b, char c) {
-int d;
-
- if (b == NULL) return BSTR_ERR;
- d = b->slen;
- if ((d | (b->mlen - d)) < 0 || balloc (b, d + 2) != BSTR_OK) return BSTR_ERR;
- b->data[d] = (unsigned char) c;
- b->data[d + 1] = (unsigned char) '\0';
- b->slen++;
- return BSTR_OK;
-}
-
-/* int bcatcstr (bstring b, const char * s)
- *
- * Concatenate a char * string to a bstring.
- */
-int bcatcstr (bstring b, const char * s) {
-char * d;
-int i, l;
-
- if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen
- || b->mlen <= 0 || s == NULL) return BSTR_ERR;
-
- /* Optimistically concatenate directly */
- l = b->mlen - b->slen;
- d = (char *) &b->data[b->slen];
- for (i=0; i < l; i++) {
- if ((*d++ = *s++) == '\0') {
- b->slen += i;
- return BSTR_OK;
- }
- }
- b->slen += i;
-
- /* Need to explicitely resize and concatenate tail */
- return bcatblk (b, (const void *) s, (int) strlen (s));
-}
-
-/* int bcatblk (bstring b, const void * s, int len)
- *
- * Concatenate a fixed length buffer to a bstring.
- */
-int bcatblk (bstring b, const void * s, int len) {
-int nl;
-
- if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen
- || b->mlen <= 0 || s == NULL || len < 0) return BSTR_ERR;
-
- if (0 > (nl = b->slen + len)) return BSTR_ERR; /* Overflow? */
- if (b->mlen <= nl && 0 > balloc (b, nl + 1)) return BSTR_ERR;
-
- bBlockCopy (&b->data[b->slen], s, (size_t) len);
- b->slen = nl;
- b->data[nl] = (unsigned char) '\0';
- return BSTR_OK;
-}
-
-/* bstring bstrcpy (const_bstring b)
- *
- * Create a copy of the bstring b.
- */
-bstring bstrcpy (const_bstring b) {
-bstring b0;
-int i,j;
-
- /* Attempted to copy an invalid string? */
- if (b == NULL || b->slen < 0 || b->data == NULL) return NULL;
-
- b0 = (bstring) bstr__alloc (sizeof (struct tagbstring));
- if (b0 == NULL) {
- /* Unable to allocate memory for string header */
- return NULL;
- }
-
- i = b->slen;
- j = snapUpSize (i + 1);
-
- b0->data = (unsigned char *) bstr__alloc (j);
- if (b0->data == NULL) {
- j = i + 1;
- b0->data = (unsigned char *) bstr__alloc (j);
- if (b0->data == NULL) {
- /* Unable to allocate memory for string data */
- bstr__free (b0);
- return NULL;
- }
- }
-
- b0->mlen = j;
- b0->slen = i;
-
- if (i) bstr__memcpy ((char *) b0->data, (char *) b->data, i);
- b0->data[b0->slen] = (unsigned char) '\0';
-
- return b0;
-}
-
-/* int bassign (bstring a, const_bstring b)
- *
- * Overwrite the string a with the contents of string b.
- */
-int bassign (bstring a, const_bstring b) {
- if (b == NULL || b->data == NULL || b->slen < 0)
- return BSTR_ERR;
- if (b->slen != 0) {
- if (balloc (a, b->slen) != BSTR_OK) return BSTR_ERR;
- bstr__memmove (a->data, b->data, b->slen);
- } else {
- if (a == NULL || a->data == NULL || a->mlen < a->slen ||
- a->slen < 0 || a->mlen == 0)
- return BSTR_ERR;
- }
- a->data[b->slen] = (unsigned char) '\0';
- a->slen = b->slen;
- return BSTR_OK;
-}
-
-/* int bassignmidstr (bstring a, const_bstring b, int left, int len)
- *
- * Overwrite the string a with the middle of contents of string b
- * starting from position left and running for a length len. left and
- * len are clamped to the ends of b as with the function bmidstr.
- */
-int bassignmidstr (bstring a, const_bstring b, int left, int len) {
- if (b == NULL || b->data == NULL || b->slen < 0)
- return BSTR_ERR;
-
- if (left < 0) {
- len += left;
- left = 0;
- }
-
- if (len > b->slen - left) len = b->slen - left;
-
- if (a == NULL || a->data == NULL || a->mlen < a->slen ||
- a->slen < 0 || a->mlen == 0)
- return BSTR_ERR;
-
- if (len > 0) {
- if (balloc (a, len) != BSTR_OK) return BSTR_ERR;
- bstr__memmove (a->data, b->data + left, len);
- a->slen = len;
- } else {
- a->slen = 0;
- }
- a->data[a->slen] = (unsigned char) '\0';
- return BSTR_OK;
-}
-
-/* int bassigncstr (bstring a, const char * str)
- *
- * Overwrite the string a with the contents of char * string str. Note that
- * the bstring a must be a well defined and writable bstring. If an error
- * occurs BSTR_ERR is returned however a may be partially overwritten.
- */
-int bassigncstr (bstring a, const char * str) {
-int i;
-size_t len;
- if (a == NULL || a->data == NULL || a->mlen < a->slen ||
- a->slen < 0 || a->mlen == 0 || NULL == str)
- return BSTR_ERR;
-
- for (i=0; i < a->mlen; i++) {
- if ('\0' == (a->data[i] = str[i])) {
- a->slen = i;
- return BSTR_OK;
- }
- }
-
- a->slen = i;
- len = strlen (str + i);
- if (len > INT_MAX || i + len + 1 > INT_MAX ||
- 0 > balloc (a, (int) (i + len + 1))) return BSTR_ERR;
- bBlockCopy (a->data + i, str + i, (size_t) len + 1);
- a->slen += (int) len;
- return BSTR_OK;
-}
-
-/* int bassignblk (bstring a, const void * s, int len)
- *
- * Overwrite the string a with the contents of the block (s, len). Note that
- * the bstring a must be a well defined and writable bstring. If an error
- * occurs BSTR_ERR is returned and a is not overwritten.
- */
-int bassignblk (bstring a, const void * s, int len) {
- if (a == NULL || a->data == NULL || a->mlen < a->slen ||
- a->slen < 0 || a->mlen == 0 || NULL == s || len + 1 < 1)
- return BSTR_ERR;
- if (len + 1 > a->mlen && 0 > balloc (a, len + 1)) return BSTR_ERR;
- bBlockCopy (a->data, s, (size_t) len);
- a->data[len] = (unsigned char) '\0';
- a->slen = len;
- return BSTR_OK;
-}
-
-/* int btrunc (bstring b, int n)
- *
- * Truncate the bstring to at most n characters.
- */
-int btrunc (bstring b, int n) {
- if (n < 0 || b == NULL || b->data == NULL || b->mlen < b->slen ||
- b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
- if (b->slen > n) {
- b->slen = n;
- b->data[n] = (unsigned char) '\0';
- }
- return BSTR_OK;
-}
-
-#define upcase(c) (toupper ((unsigned char) c))
-#define downcase(c) (tolower ((unsigned char) c))
-#define wspace(c) (isspace ((unsigned char) c))
-
-/* int btoupper (bstring b)
- *
- * Convert contents of bstring to upper case.
- */
-int btoupper (bstring b) {
-int i, len;
- if (b == NULL || b->data == NULL || b->mlen < b->slen ||
- b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
- for (i=0, len = b->slen; i < len; i++) {
- b->data[i] = (unsigned char) upcase (b->data[i]);
- }
- return BSTR_OK;
-}
-
-/* int btolower (bstring b)
- *
- * Convert contents of bstring to lower case.
- */
-int btolower (bstring b) {
-int i, len;
- if (b == NULL || b->data == NULL || b->mlen < b->slen ||
- b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
- for (i=0, len = b->slen; i < len; i++) {
- b->data[i] = (unsigned char) downcase (b->data[i]);
- }
- return BSTR_OK;
-}
-
-/* int bstricmp (const_bstring b0, const_bstring b1)
- *
- * Compare two strings without differentiating between case. The return
- * value is the difference of the values of the characters where the two
- * strings first differ after lower case transformation, otherwise 0 is
- * returned indicating that the strings are equal. If the lengths are
- * different, then a difference from 0 is given, but if the first extra
- * character is '\0', then it is taken to be the value UCHAR_MAX+1.
- */
-int bstricmp (const_bstring b0, const_bstring b1) {
-int i, v, n;
-
- if (bdata (b0) == NULL || b0->slen < 0 ||
- bdata (b1) == NULL || b1->slen < 0) return SHRT_MIN;
- if ((n = b0->slen) > b1->slen) n = b1->slen;
- else if (b0->slen == b1->slen && b0->data == b1->data) return BSTR_OK;
-
- for (i = 0; i < n; i ++) {
- v = (char) downcase (b0->data[i])
- - (char) downcase (b1->data[i]);
- if (0 != v) return v;
- }
-
- if (b0->slen > n) {
- v = (char) downcase (b0->data[n]);
- if (v) return v;
- return UCHAR_MAX + 1;
- }
- if (b1->slen > n) {
- v = - (char) downcase (b1->data[n]);
- if (v) return v;
- return - (int) (UCHAR_MAX + 1);
- }
- return BSTR_OK;
-}
-
-/* int bstrnicmp (const_bstring b0, const_bstring b1, int n)
- *
- * Compare two strings without differentiating between case for at most n
- * characters. If the position where the two strings first differ is
- * before the nth position, the return value is the difference of the values
- * of the characters, otherwise 0 is returned. If the lengths are different
- * and less than n characters, then a difference from 0 is given, but if the
- * first extra character is '\0', then it is taken to be the value
- * UCHAR_MAX+1.
- */
-int bstrnicmp (const_bstring b0, const_bstring b1, int n) {
-int i, v, m;
-
- if (bdata (b0) == NULL || b0->slen < 0 ||
- bdata (b1) == NULL || b1->slen < 0 || n < 0) return SHRT_MIN;
- m = n;
- if (m > b0->slen) m = b0->slen;
- if (m > b1->slen) m = b1->slen;
-
- if (b0->data != b1->data) {
- for (i = 0; i < m; i ++) {
- v = (char) downcase (b0->data[i]);
- v -= (char) downcase (b1->data[i]);
- if (v != 0) return b0->data[i] - b1->data[i];
- }
- }
-
- if (n == m || b0->slen == b1->slen) return BSTR_OK;
-
- if (b0->slen > m) {
- v = (char) downcase (b0->data[m]);
- if (v) return v;
- return UCHAR_MAX + 1;
- }
-
- v = - (char) downcase (b1->data[m]);
- if (v) return v;
- return - (int) (UCHAR_MAX + 1);
-}
-
-/* int biseqcaseless (const_bstring b0, const_bstring b1)
- *
- * Compare two strings for equality without differentiating between case.
- * If the strings differ other than in case, 0 is returned, if the strings
- * are the same, 1 is returned, if there is an error, -1 is returned. If
- * the length of the strings are different, this function is O(1). '\0'
- * termination characters are not treated in any special way.
- */
-int biseqcaseless (const_bstring b0, const_bstring b1) {
-int i, n;
-
- if (bdata (b0) == NULL || b0->slen < 0 ||
- bdata (b1) == NULL || b1->slen < 0) return BSTR_ERR;
- if (b0->slen != b1->slen) return BSTR_OK;
- if (b0->data == b1->data || b0->slen == 0) return 1;
- for (i=0, n=b0->slen; i < n; i++) {
- if (b0->data[i] != b1->data[i]) {
- unsigned char c = (unsigned char) downcase (b0->data[i]);
- if (c != (unsigned char) downcase (b1->data[i])) return 0;
- }
- }
- return 1;
-}
-
-/* int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len)
- *
- * Compare beginning of string b0 with a block of memory of length len
- * without differentiating between case for equality. If the beginning of b0
- * differs from the memory block other than in case (or if b0 is too short),
- * 0 is returned, if the strings are the same, 1 is returned, if there is an
- * error, -1 is returned. '\0' characters are not treated in any special
- * way.
- */
-int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len) {
-int i;
-
- if (bdata (b0) == NULL || b0->slen < 0 || NULL == blk || len < 0)
- return BSTR_ERR;
- if (b0->slen < len) return BSTR_OK;
- if (b0->data == (const unsigned char *) blk || len == 0) return 1;
-
- for (i = 0; i < len; i ++) {
- if (b0->data[i] != ((const unsigned char *) blk)[i]) {
- if (downcase (b0->data[i]) !=
- downcase (((const unsigned char *) blk)[i])) return 0;
- }
- }
- return 1;
-}
-
-/*
- * int bltrimws (bstring b)
- *
- * Delete whitespace contiguous from the left end of the string.
- */
-int bltrimws (bstring b) {
-int i, len;
-
- if (b == NULL || b->data == NULL || b->mlen < b->slen ||
- b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
-
- for (len = b->slen, i = 0; i < len; i++) {
- if (!wspace (b->data[i])) {
- return bdelete (b, 0, i);
- }
- }
-
- b->data[0] = (unsigned char) '\0';
- b->slen = 0;
- return BSTR_OK;
-}
-
-/*
- * int brtrimws (bstring b)
- *
- * Delete whitespace contiguous from the right end of the string.
- */
-int brtrimws (bstring b) {
-int i;
-
- if (b == NULL || b->data == NULL || b->mlen < b->slen ||
- b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
-
- for (i = b->slen - 1; i >= 0; i--) {
- if (!wspace (b->data[i])) {
- if (b->mlen > i) b->data[i+1] = (unsigned char) '\0';
- b->slen = i + 1;
- return BSTR_OK;
- }
- }
-
- b->data[0] = (unsigned char) '\0';
- b->slen = 0;
- return BSTR_OK;
-}
-
-/*
- * int btrimws (bstring b)
- *
- * Delete whitespace contiguous from both ends of the string.
- */
-int btrimws (bstring b) {
-int i, j;
-
- if (b == NULL || b->data == NULL || b->mlen < b->slen ||
- b->slen < 0 || b->mlen <= 0) return BSTR_ERR;
-
- for (i = b->slen - 1; i >= 0; i--) {
- if (!wspace (b->data[i])) {
- if (b->mlen > i) b->data[i+1] = (unsigned char) '\0';
- b->slen = i + 1;
- for (j = 0; wspace (b->data[j]); j++) {}
- return bdelete (b, 0, j);
- }
- }
-
- b->data[0] = (unsigned char) '\0';
- b->slen = 0;
- return BSTR_OK;
-}
-
-/* int biseq (const_bstring b0, const_bstring b1)
- *
- * Compare the string b0 and b1. If the strings differ, 0 is returned, if
- * the strings are the same, 1 is returned, if there is an error, -1 is
- * returned. If the length of the strings are different, this function is
- * O(1). '\0' termination characters are not treated in any special way.
- */
-int biseq (const_bstring b0, const_bstring b1) {
- if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL ||
- b0->slen < 0 || b1->slen < 0) return BSTR_ERR;
- if (b0->slen != b1->slen) return BSTR_OK;
- if (b0->data == b1->data || b0->slen == 0) return 1;
- return !bstr__memcmp (b0->data, b1->data, b0->slen);
-}
-
-/* int bisstemeqblk (const_bstring b0, const void * blk, int len)
- *
- * Compare beginning of string b0 with a block of memory of length len for
- * equality. If the beginning of b0 differs from the memory block (or if b0
- * is too short), 0 is returned, if the strings are the same, 1 is returned,
- * if there is an error, -1 is returned. '\0' characters are not treated in
- * any special way.
- */
-int bisstemeqblk (const_bstring b0, const void * blk, int len) {
-int i;
-
- if (bdata (b0) == NULL || b0->slen < 0 || NULL == blk || len < 0)
- return BSTR_ERR;
- if (b0->slen < len) return BSTR_OK;
- if (b0->data == (const unsigned char *) blk || len == 0) return 1;
-
- for (i = 0; i < len; i ++) {
- if (b0->data[i] != ((const unsigned char *) blk)[i]) return BSTR_OK;
- }
- return 1;
-}
-
-/* int biseqcstr (const_bstring b, const char *s)
- *
- * Compare the bstring b and char * string s. The C string s must be '\0'
- * terminated at exactly the length of the bstring b, and the contents
- * between the two must be identical with the bstring b with no '\0'
- * characters for the two contents to be considered equal. This is
- * equivalent to the condition that their current contents will be always be
- * equal when comparing them in the same format after converting one or the
- * other. If the strings are equal 1 is returned, if they are unequal 0 is
- * returned and if there is a detectable error BSTR_ERR is returned.
- */
-int biseqcstr (const_bstring b, const char * s) {
-int i;
- if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR;
- for (i=0; i < b->slen; i++) {
- if (s[i] == '\0' || b->data[i] != (unsigned char) s[i]) return BSTR_OK;
- }
- return s[i] == '\0';
-}
-
-/* int biseqcstrcaseless (const_bstring b, const char *s)
- *
- * Compare the bstring b and char * string s. The C string s must be '\0'
- * terminated at exactly the length of the bstring b, and the contents
- * between the two must be identical except for case with the bstring b with
- * no '\0' characters for the two contents to be considered equal. This is
- * equivalent to the condition that their current contents will be always be
- * equal ignoring case when comparing them in the same format after
- * converting one or the other. If the strings are equal, except for case,
- * 1 is returned, if they are unequal regardless of case 0 is returned and
- * if there is a detectable error BSTR_ERR is returned.
- */
-int biseqcstrcaseless (const_bstring b, const char * s) {
-int i;
- if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR;
- for (i=0; i < b->slen; i++) {
- if (s[i] == '\0' ||
- (b->data[i] != (unsigned char) s[i] &&
- downcase (b->data[i]) != (unsigned char) downcase (s[i])))
- return BSTR_OK;
- }
- return s[i] == '\0';
-}
-
-/* int bstrcmp (const_bstring b0, const_bstring b1)
- *
- * Compare the string b0 and b1. If there is an error, SHRT_MIN is returned,
- * otherwise a value less than or greater than zero, indicating that the
- * string pointed to by b0 is lexicographically less than or greater than
- * the string pointed to by b1 is returned. If the the string lengths are
- * unequal but the characters up until the length of the shorter are equal
- * then a value less than, or greater than zero, indicating that the string
- * pointed to by b0 is shorter or longer than the string pointed to by b1 is
- * returned. 0 is returned if and only if the two strings are the same. If
- * the length of the strings are different, this function is O(n). Like its
- * standard C library counter part strcmp, the comparison does not proceed
- * past any '\0' termination characters encountered.
- */
-int bstrcmp (const_bstring b0, const_bstring b1) {
-int i, v, n;
-
- if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL ||
- b0->slen < 0 || b1->slen < 0) return SHRT_MIN;
- n = b0->slen; if (n > b1->slen) n = b1->slen;
- if (b0->slen == b1->slen && (b0->data == b1->data || b0->slen == 0))
- return BSTR_OK;
-
- for (i = 0; i < n; i ++) {
- v = ((char) b0->data[i]) - ((char) b1->data[i]);
- if (v != 0) return v;
- if (b0->data[i] == (unsigned char) '\0') return BSTR_OK;
- }
-
- if (b0->slen > n) return 1;
- if (b1->slen > n) return -1;
- return BSTR_OK;
-}
-
-/* int bstrncmp (const_bstring b0, const_bstring b1, int n)
- *
- * Compare the string b0 and b1 for at most n characters. If there is an
- * error, SHRT_MIN is returned, otherwise a value is returned as if b0 and
- * b1 were first truncated to at most n characters then bstrcmp was called
- * with these new strings are paremeters. If the length of the strings are
- * different, this function is O(n). Like its standard C library counter
- * part strcmp, the comparison does not proceed past any '\0' termination
- * characters encountered.
- */
-int bstrncmp (const_bstring b0, const_bstring b1, int n) {
-int i, v, m;
-
- if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL ||
- b0->slen < 0 || b1->slen < 0) return SHRT_MIN;
- m = n;
- if (m > b0->slen) m = b0->slen;
- if (m > b1->slen) m = b1->slen;
-
- if (b0->data != b1->data) {
- for (i = 0; i < m; i ++) {
- v = ((char) b0->data[i]) - ((char) b1->data[i]);
- if (v != 0) return v;
- if (b0->data[i] == (unsigned char) '\0') return BSTR_OK;
- }
- }
-
- if (n == m || b0->slen == b1->slen) return BSTR_OK;
-
- if (b0->slen > m) return 1;
- return -1;
-}
-
-/* bstring bmidstr (const_bstring b, int left, int len)
- *
- * Create a bstring which is the substring of b starting from position left
- * and running for a length len (clamped by the end of the bstring b.) If
- * b is detectably invalid, then NULL is returned. The section described
- * by (left, len) is clamped to the boundaries of b.
- */
-bstring bmidstr (const_bstring b, int left, int len) {
-
- if (b == NULL || b->slen < 0 || b->data == NULL) return NULL;
-
- if (left < 0) {
- len += left;
- left = 0;
- }
-
- if (len > b->slen - left) len = b->slen - left;
-
- if (len <= 0) return bfromcstr ("");
- return blk2bstr (b->data + left, len);
-}
-
-/* int bdelete (bstring b, int pos, int len)
- *
- * Removes characters from pos to pos+len-1 inclusive and shifts the tail of
- * the bstring starting from pos+len to pos. len must be positive for this
- * call to have any effect. The section of the string described by (pos,
- * len) is clamped to boundaries of the bstring b.
- */
-int bdelete (bstring b, int pos, int len) {
- /* Clamp to left side of bstring */
- if (pos < 0) {
- len += pos;
- pos = 0;
- }
-
- if (len < 0 || b == NULL || b->data == NULL || b->slen < 0 ||
- b->mlen < b->slen || b->mlen <= 0)
- return BSTR_ERR;
- if (len > 0 && pos < b->slen) {
- if (pos + len >= b->slen) {
- b->slen = pos;
- } else {
- bBlockCopy ((char *) (b->data + pos),
- (char *) (b->data + pos + len),
- b->slen - (pos+len));
- b->slen -= len;
- }
- b->data[b->slen] = (unsigned char) '\0';
- }
- return BSTR_OK;
-}
-
-/* int bdestroy (bstring b)
- *
- * Free up the bstring. Note that if b is detectably invalid or not writable
- * then no action is performed and BSTR_ERR is returned. Like a freed memory
- * allocation, dereferences, writes or any other action on b after it has
- * been bdestroyed is undefined.
- */
-int bdestroy (bstring b) {
- if (b == NULL || b->slen < 0 || b->mlen <= 0 || b->mlen < b->slen ||
- b->data == NULL)
- return BSTR_ERR;
-
- bstr__free (b->data);
-
- /* In case there is any stale usage, there is one more chance to
- notice this error. */
-
- b->slen = -1;
- b->mlen = -__LINE__;
- b->data = NULL;
-
- bstr__free (b);
- return BSTR_OK;
-}
-
-/* int binstr (const_bstring b1, int pos, const_bstring b2)
- *
- * Search for the bstring b2 in b1 starting from position pos, and searching
- * forward. If it is found then return with the first position where it is
- * found, otherwise return BSTR_ERR. Note that this is just a brute force
- * string searcher that does not attempt clever things like the Boyer-Moore
- * search algorithm. Because of this there are many degenerate cases where
- * this can take much longer than it needs to.
- */
-int binstr (const_bstring b1, int pos, const_bstring b2) {
-int j, ii, ll, lf;
-unsigned char * d0;
-unsigned char c0;
-register unsigned char * d1;
-register unsigned char c1;
-register int i;
-
- if (b1 == NULL || b1->data == NULL || b1->slen < 0 ||
- b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR;
- if (b1->slen == pos) return (b2->slen == 0)?pos:BSTR_ERR;
- if (b1->slen < pos || pos < 0) return BSTR_ERR;
- if (b2->slen == 0) return pos;
-
- /* No space to find such a string? */
- if ((lf = b1->slen - b2->slen + 1) <= pos) return BSTR_ERR;
-
- /* An obvious alias case */
- if (b1->data == b2->data && pos == 0) return 0;
-
- i = pos;
-
- d0 = b2->data;
- d1 = b1->data;
- ll = b2->slen;
-
- /* Peel off the b2->slen == 1 case */
- c0 = d0[0];
- if (1 == ll) {
- for (;i < lf; i++) if (c0 == d1[i]) return i;
- return BSTR_ERR;
- }
-
- c1 = c0;
- j = 0;
- lf = b1->slen - 1;
-
- ii = -1;
- if (i < lf) do {
- /* Unrolled current character test */
- if (c1 != d1[i]) {
- if (c1 != d1[1+i]) {
- i += 2;
- continue;
- }
- i++;
- }
-
- /* Take note if this is the start of a potential match */
- if (0 == j) ii = i;
-
- /* Shift the test character down by one */
- j++;
- i++;
-
- /* If this isn't past the last character continue */
- if (j < ll) {
- c1 = d0[j];
- continue;
- }
-
- N0:;
-
- /* If no characters mismatched, then we matched */
- if (i == ii+j) return ii;
-
- /* Shift back to the beginning */
- i -= j;
- j = 0;
- c1 = c0;
- } while (i < lf);
-
- /* Deal with last case if unrolling caused a misalignment */
- if (i == lf && ll == j+1 && c1 == d1[i]) goto N0;
-
- return BSTR_ERR;
-}
-
-/* int binstrr (const_bstring b1, int pos, const_bstring b2)
- *
- * Search for the bstring b2 in b1 starting from position pos, and searching
- * backward. If it is found then return with the first position where it is
- * found, otherwise return BSTR_ERR. Note that this is just a brute force
- * string searcher that does not attempt clever things like the Boyer-Moore
- * search algorithm. Because of this there are many degenerate cases where
- * this can take much longer than it needs to.
- */
-int binstrr (const_bstring b1, int pos, const_bstring b2) {
-int j, i, l;
-unsigned char * d0, * d1;
-
- if (b1 == NULL || b1->data == NULL || b1->slen < 0 ||
- b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR;
- if (b1->slen == pos && b2->slen == 0) return pos;
- if (b1->slen < pos || pos < 0) return BSTR_ERR;
- if (b2->slen == 0) return pos;
-
- /* Obvious alias case */
- if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return 0;
-
- i = pos;
- if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR;
-
- /* If no space to find such a string then snap back */
- if (l + 1 <= i) i = l;
- j = 0;
-
- d0 = b2->data;
- d1 = b1->data;
- l = b2->slen;
-
- for (;;) {
- if (d0[j] == d1[i + j]) {
- j ++;
- if (j >= l) return i;
- } else {
- i --;
- if (i < 0) break;
- j=0;
- }
- }
-
- return BSTR_ERR;
-}
-
-/* int binstrcaseless (const_bstring b1, int pos, const_bstring b2)
- *
- * Search for the bstring b2 in b1 starting from position pos, and searching
- * forward but without regard to case. If it is found then return with the
- * first position where it is found, otherwise return BSTR_ERR. Note that
- * this is just a brute force string searcher that does not attempt clever
- * things like the Boyer-Moore search algorithm. Because of this there are
- * many degenerate cases where this can take much longer than it needs to.
- */
-int binstrcaseless (const_bstring b1, int pos, const_bstring b2) {
-int j, i, l, ll;
-unsigned char * d0, * d1;
-
- if (b1 == NULL || b1->data == NULL || b1->slen < 0 ||
- b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR;
- if (b1->slen == pos) return (b2->slen == 0)?pos:BSTR_ERR;
- if (b1->slen < pos || pos < 0) return BSTR_ERR;
- if (b2->slen == 0) return pos;
-
- l = b1->slen - b2->slen + 1;
-
- /* No space to find such a string? */
- if (l <= pos) return BSTR_ERR;
-
- /* An obvious alias case */
- if (b1->data == b2->data && pos == 0) return BSTR_OK;
-
- i = pos;
- j = 0;
-
- d0 = b2->data;
- d1 = b1->data;
- ll = b2->slen;
-
- for (;;) {
- if (d0[j] == d1[i + j] || downcase (d0[j]) == downcase (d1[i + j])) {
- j ++;
- if (j >= ll) return i;
- } else {
- i ++;
- if (i >= l) break;
- j=0;
- }
- }
-
- return BSTR_ERR;
-}
-
-/* int binstrrcaseless (const_bstring b1, int pos, const_bstring b2)
- *
- * Search for the bstring b2 in b1 starting from position pos, and searching
- * backward but without regard to case. If it is found then return with the
- * first position where it is found, otherwise return BSTR_ERR. Note that
- * this is just a brute force string searcher that does not attempt clever
- * things like the Boyer-Moore search algorithm. Because of this there are
- * many degenerate cases where this can take much longer than it needs to.
- */
-int binstrrcaseless (const_bstring b1, int pos, const_bstring b2) {
-int j, i, l;
-unsigned char * d0, * d1;
-
- if (b1 == NULL || b1->data == NULL || b1->slen < 0 ||
- b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR;
- if (b1->slen == pos && b2->slen == 0) return pos;
- if (b1->slen < pos || pos < 0) return BSTR_ERR;
- if (b2->slen == 0) return pos;
-
- /* Obvious alias case */
- if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return BSTR_OK;
-
- i = pos;
- if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR;
-
- /* If no space to find such a string then snap back */
- if (l + 1 <= i) i = l;
- j = 0;
-
- d0 = b2->data;
- d1 = b1->data;
- l = b2->slen;
-
- for (;;) {
- if (d0[j] == d1[i + j] || downcase (d0[j]) == downcase (d1[i + j])) {
- j ++;
- if (j >= l) return i;
- } else {
- i --;
- if (i < 0) break;
- j=0;
- }
- }
-
- return BSTR_ERR;
-}
-
-
-/* int bstrchrp (const_bstring b, int c, int pos)
- *
- * Search for the character c in b forwards from the position pos
- * (inclusive).
- */
-int bstrchrp (const_bstring b, int c, int pos) {
-unsigned char * p;
-
- if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR;
- p = (unsigned char *) bstr__memchr ((b->data + pos), (unsigned char) c, (b->slen - pos));
- if (p) return (int) (p - b->data);
- return BSTR_ERR;
-}
-
-/* int bstrrchrp (const_bstring b, int c, int pos)
- *
- * Search for the character c in b backwards from the position pos in string
- * (inclusive).
- */
-int bstrrchrp (const_bstring b, int c, int pos) {
-int i;
-
- if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR;
- for (i=pos; i >= 0; i--) {
- if (b->data[i] == (unsigned char) c) return i;
- }
- return BSTR_ERR;
-}
-
-#if !defined (BSTRLIB_AGGRESSIVE_MEMORY_FOR_SPEED_TRADEOFF)
-#define LONG_LOG_BITS_QTY (3)
-#define LONG_BITS_QTY (1 << LONG_LOG_BITS_QTY)
-#define LONG_TYPE unsigned char
-
-#define CFCLEN ((1 << CHAR_BIT) / LONG_BITS_QTY)
-struct charField { LONG_TYPE content[CFCLEN]; };
-#define testInCharField(cf,c) ((cf)->content[(c) >> LONG_LOG_BITS_QTY] & (((long)1) << ((c) & (LONG_BITS_QTY-1))))
-#define setInCharField(cf,idx) { \
- unsigned int c = (unsigned int) (idx); \
- (cf)->content[c >> LONG_LOG_BITS_QTY] |= (LONG_TYPE) (1ul << (c & (LONG_BITS_QTY-1))); \
-}
-
-#else
-
-#define CFCLEN (1 << CHAR_BIT)
-struct charField { unsigned char content[CFCLEN]; };
-#define testInCharField(cf,c) ((cf)->content[(unsigned char) (c)])
-#define setInCharField(cf,idx) (cf)->content[(unsigned int) (idx)] = ~0
-
-#endif
-
-/* Convert a bstring to charField */
-static int buildCharField (struct charField * cf, const_bstring b) {
-int i;
- if (b == NULL || b->data == NULL || b->slen <= 0) return BSTR_ERR;
- memset ((void *) cf->content, 0, sizeof (struct charField));
- for (i=0; i < b->slen; i++) {
- setInCharField (cf, b->data[i]);
- }
- return BSTR_OK;
-}
-
-static void invertCharField (struct charField * cf) {
-int i;
- for (i=0; i < CFCLEN; i++) cf->content[i] = ~cf->content[i];
-}
-
-/* Inner engine for binchr */
-static int binchrCF (const unsigned char * data, int len, int pos, const struct charField * cf) {
-int i;
- for (i=pos; i < len; i++) {
- unsigned char c = (unsigned char) data[i];
- if (testInCharField (cf, c)) return i;
- }
- return BSTR_ERR;
-}
-
-/* int binchr (const_bstring b0, int pos, const_bstring b1);
- *
- * Search for the first position in b0 starting from pos or after, in which
- * one of the characters in b1 is found and return it. If such a position
- * does not exist in b0, then BSTR_ERR is returned.
- */
-int binchr (const_bstring b0, int pos, const_bstring b1) {
-struct charField chrs;
- if (pos < 0 || b0 == NULL || b0->data == NULL ||
- b0->slen <= pos) return BSTR_ERR;
- if (1 == b1->slen) return bstrchrp (b0, b1->data[0], pos);
- if (0 > buildCharField (&chrs, b1)) return BSTR_ERR;
- return binchrCF (b0->data, b0->slen, pos, &chrs);
-}
-
-/* Inner engine for binchrr */
-static int binchrrCF (const unsigned char * data, int pos, const struct charField * cf) {
-int i;
- for (i=pos; i >= 0; i--) {
- unsigned int c = (unsigned int) data[i];
- if (testInCharField (cf, c)) return i;
- }
- return BSTR_ERR;
-}
-
-/* int binchrr (const_bstring b0, int pos, const_bstring b1);
- *
- * Search for the last position in b0 no greater than pos, in which one of
- * the characters in b1 is found and return it. If such a position does not
- * exist in b0, then BSTR_ERR is returned.
- */
-int binchrr (const_bstring b0, int pos, const_bstring b1) {
-struct charField chrs;
- if (pos < 0 || b0 == NULL || b0->data == NULL || b1 == NULL ||
- b0->slen < pos) return BSTR_ERR;
- if (pos == b0->slen) pos--;
- if (1 == b1->slen) return bstrrchrp (b0, b1->data[0], pos);
- if (0 > buildCharField (&chrs, b1)) return BSTR_ERR;
- return binchrrCF (b0->data, pos, &chrs);
-}
-
-/* int bninchr (const_bstring b0, int pos, const_bstring b1);
- *
- * Search for the first position in b0 starting from pos or after, in which
- * none of the characters in b1 is found and return it. If such a position
- * does not exist in b0, then BSTR_ERR is returned.
- */
-int bninchr (const_bstring b0, int pos, const_bstring b1) {
-struct charField chrs;
- if (pos < 0 || b0 == NULL || b0->data == NULL ||
- b0->slen <= pos) return BSTR_ERR;
- if (buildCharField (&chrs, b1) < 0) return BSTR_ERR;
- invertCharField (&chrs);
- return binchrCF (b0->data, b0->slen, pos, &chrs);
-}
-
-/* int bninchrr (const_bstring b0, int pos, const_bstring b1);
- *
- * Search for the last position in b0 no greater than pos, in which none of
- * the characters in b1 is found and return it. If such a position does not
- * exist in b0, then BSTR_ERR is returned.
- */
-int bninchrr (const_bstring b0, int pos, const_bstring b1) {
-struct charField chrs;
- if (pos < 0 || b0 == NULL || b0->data == NULL ||
- b0->slen < pos) return BSTR_ERR;
- if (pos == b0->slen) pos--;
- if (buildCharField (&chrs, b1) < 0) return BSTR_ERR;
- invertCharField (&chrs);
- return binchrrCF (b0->data, pos, &chrs);
-}
-
-/* int bsetstr (bstring b0, int pos, bstring b1, unsigned char fill)
- *
- * Overwrite the string b0 starting at position pos with the string b1. If
- * the position pos is past the end of b0, then the character "fill" is
- * appended as necessary to make up the gap between the end of b0 and pos.
- * If b1 is NULL, it behaves as if it were a 0-length string.
- */
-int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill) {
-int d, newlen;
-ptrdiff_t pd;
-bstring aux = (bstring) b1;
-
- if (pos < 0 || b0 == NULL || b0->slen < 0 || NULL == b0->data ||
- b0->mlen < b0->slen || b0->mlen <= 0) return BSTR_ERR;
- if (b1 != NULL && (b1->slen < 0 || b1->data == NULL)) return BSTR_ERR;
-
- d = pos;
-
- /* Aliasing case */
- if (NULL != aux) {
- if ((pd = (ptrdiff_t) (b1->data - b0->data)) >= 0 && pd < (ptrdiff_t) b0->mlen) {
- if (NULL == (aux = bstrcpy (b1))) return BSTR_ERR;
- }
- d += aux->slen;
- }
-
- /* Increase memory size if necessary */
- if (balloc (b0, d + 1) != BSTR_OK) {
- if (aux != b1) bdestroy (aux);
- return BSTR_ERR;
- }
-
- newlen = b0->slen;
-
- /* Fill in "fill" character as necessary */
- if (pos > newlen) {
- bstr__memset (b0->data + b0->slen, (int) fill, (size_t) (pos - b0->slen));
- newlen = pos;
- }
-
- /* Copy b1 to position pos in b0. */
- if (aux != NULL) {
- bBlockCopy ((char *) (b0->data + pos), (char *) aux->data, aux->slen);
- if (aux != b1) bdestroy (aux);
- }
-
- /* Indicate the potentially increased size of b0 */
- if (d > newlen) newlen = d;
-
- b0->slen = newlen;
- b0->data[newlen] = (unsigned char) '\0';
-
- return BSTR_OK;
-}
-
-/* int binsert (bstring b1, int pos, bstring b2, unsigned char fill)
- *
- * Inserts the string b2 into b1 at position pos. If the position pos is
- * past the end of b1, then the character "fill" is appended as necessary to
- * make up the gap between the end of b1 and pos. Unlike bsetstr, binsert
- * does not allow b2 to be NULL.
- */
-int binsert (bstring b1, int pos, const_bstring b2, unsigned char fill) {
-int d, l;
-ptrdiff_t pd;
-bstring aux = (bstring) b2;
-
- if (pos < 0 || b1 == NULL || b2 == NULL || b1->slen < 0 ||
- b2->slen < 0 || b1->mlen < b1->slen || b1->mlen <= 0) return BSTR_ERR;
-
- /* Aliasing case */
- if ((pd = (ptrdiff_t) (b2->data - b1->data)) >= 0 && pd < (ptrdiff_t) b1->mlen) {
- if (NULL == (aux = bstrcpy (b2))) return BSTR_ERR;
- }
-
- /* Compute the two possible end pointers */
- d = b1->slen + aux->slen;
- l = pos + aux->slen;
- if ((d|l) < 0) return BSTR_ERR;
-
- if (l > d) {
- /* Inserting past the end of the string */
- if (balloc (b1, l + 1) != BSTR_OK) {
- if (aux != b2) bdestroy (aux);
- return BSTR_ERR;
- }
- bstr__memset (b1->data + b1->slen, (int) fill, (size_t) (pos - b1->slen));
- b1->slen = l;
- } else {
- /* Inserting in the middle of the string */
- if (balloc (b1, d + 1) != BSTR_OK) {
- if (aux != b2) bdestroy (aux);
- return BSTR_ERR;
- }
- bBlockCopy (b1->data + l, b1->data + pos, d - l);
- b1->slen = d;
- }
- bBlockCopy (b1->data + pos, aux->data, aux->slen);
- b1->data[b1->slen] = (unsigned char) '\0';
- if (aux != b2) bdestroy (aux);
- return BSTR_OK;
-}
-
-/* int breplace (bstring b1, int pos, int len, bstring b2,
- * unsigned char fill)
- *
- * Replace a section of a string from pos for a length len with the string b2.
- * fill is used is pos > b1->slen.
- */
-int breplace (bstring b1, int pos, int len, const_bstring b2,
- unsigned char fill) {
-int pl, ret;
-ptrdiff_t pd;
-bstring aux = (bstring) b2;
-
- if (pos < 0 || len < 0 || (pl = pos + len) < 0 || b1 == NULL ||
- b2 == NULL || b1->data == NULL || b2->data == NULL ||
- b1->slen < 0 || b2->slen < 0 || b1->mlen < b1->slen ||
- b1->mlen <= 0) return BSTR_ERR;
-
- /* Straddles the end? */
- if (pl >= b1->slen) {
- if ((ret = bsetstr (b1, pos, b2, fill)) < 0) return ret;
- if (pos + b2->slen < b1->slen) {
- b1->slen = pos + b2->slen;
- b1->data[b1->slen] = (unsigned char) '\0';
- }
- return ret;
- }
-
- /* Aliasing case */
- if ((pd = (ptrdiff_t) (b2->data - b1->data)) >= 0 && pd < (ptrdiff_t) b1->slen) {
- if (NULL == (aux = bstrcpy (b2))) return BSTR_ERR;
- }
-
- if (aux->slen > len) {
- if (balloc (b1, b1->slen + aux->slen - len) != BSTR_OK) {
- if (aux != b2) bdestroy (aux);
- return BSTR_ERR;
- }
- }
-
- if (aux->slen != len) bstr__memmove (b1->data + pos + aux->slen, b1->data + pos + len, b1->slen - (pos + len));
- bstr__memcpy (b1->data + pos, aux->data, aux->slen);
- b1->slen += aux->slen - len;
- b1->data[b1->slen] = (unsigned char) '\0';
- if (aux != b2) bdestroy (aux);
- return BSTR_OK;
-}
-
-/*
- * findreplaceengine is used to implement bfindreplace and
- * bfindreplacecaseless. It works by breaking the three cases of
- * expansion, reduction and replacement, and solving each of these
- * in the most efficient way possible.
- */
-
-typedef int (*instr_fnptr) (const_bstring s1, int pos, const_bstring s2);
-
-#define INITIAL_STATIC_FIND_INDEX_COUNT 32
-
-static int findreplaceengine (bstring b, const_bstring find, const_bstring repl, int pos, instr_fnptr instr) {
-int i, ret, slen, mlen, delta, acc;
-int * d;
-int static_d[INITIAL_STATIC_FIND_INDEX_COUNT+1]; /* This +1 is unnecessary, but it shuts up LINT. */
-ptrdiff_t pd;
-bstring auxf = (bstring) find;
-bstring auxr = (bstring) repl;
-
- if (b == NULL || b->data == NULL || find == NULL ||
- find->data == NULL || repl == NULL || repl->data == NULL ||
- pos < 0 || find->slen <= 0 || b->mlen < 0 || b->slen > b->mlen ||
- b->mlen <= 0 || b->slen < 0 || repl->slen < 0) return BSTR_ERR;
- if (pos > b->slen - find->slen) return BSTR_OK;
-
- /* Alias with find string */
- pd = (ptrdiff_t) (find->data - b->data);
- if ((ptrdiff_t) (pos - find->slen) < pd && pd < (ptrdiff_t) b->slen) {
- if (NULL == (auxf = bstrcpy (find))) return BSTR_ERR;
- }
-
- /* Alias with repl string */
- pd = (ptrdiff_t) (repl->data - b->data);
- if ((ptrdiff_t) (pos - repl->slen) < pd && pd < (ptrdiff_t) b->slen) {
- if (NULL == (auxr = bstrcpy (repl))) {
- if (auxf != find) bdestroy (auxf);
- return BSTR_ERR;
- }
- }
-
- delta = auxf->slen - auxr->slen;
-
- /* in-place replacement since find and replace strings are of equal
- length */
- if (delta == 0) {
- while ((pos = instr (b, pos, auxf)) >= 0) {
- bstr__memcpy (b->data + pos, auxr->data, auxr->slen);
- pos += auxf->slen;
- }
- if (auxf != find) bdestroy (auxf);
- if (auxr != repl) bdestroy (auxr);
- return BSTR_OK;
- }
-
- /* shrinking replacement since auxf->slen > auxr->slen */
- if (delta > 0) {
- acc = 0;
-
- while ((i = instr (b, pos, auxf)) >= 0) {
- if (acc && i > pos)
- bstr__memmove (b->data + pos - acc, b->data + pos, i - pos);
- if (auxr->slen)
- bstr__memcpy (b->data + i - acc, auxr->data, auxr->slen);
- acc += delta;
- pos = i + auxf->slen;
- }
-
- if (acc) {
- i = b->slen;
- if (i > pos)
- bstr__memmove (b->data + pos - acc, b->data + pos, i - pos);
- b->slen -= acc;
- b->data[b->slen] = (unsigned char) '\0';
- }
-
- if (auxf != find) bdestroy (auxf);
- if (auxr != repl) bdestroy (auxr);
- return BSTR_OK;
- }
-
- /* expanding replacement since find->slen < repl->slen. Its a lot
- more complicated. This works by first finding all the matches and
- storing them to a growable array, then doing at most one resize of
- the destination bstring and then performing the direct memory transfers
- of the string segment pieces to form the final result. The growable
- array of matches uses a deferred doubling reallocing strategy. What
- this means is that it starts as a reasonably fixed sized auto array in
- the hopes that many if not most cases will never need to grow this
- array. But it switches as soon as the bounds of the array will be
- exceeded. An extra find result is always appended to this array that
- corresponds to the end of the destination string, so slen is checked
- against mlen - 1 rather than mlen before resizing.
- */
-
- mlen = INITIAL_STATIC_FIND_INDEX_COUNT;
- d = (int *) static_d; /* Avoid malloc for trivial/initial cases */
- acc = slen = 0;
-
- while ((pos = instr (b, pos, auxf)) >= 0) {
- if (slen >= mlen - 1) {
- int sl, *t;
-
- mlen += mlen;
- sl = sizeof (int *) * mlen;
- if (static_d == d) d = NULL; /* static_d cannot be realloced */
- if (mlen <= 0 || sl < mlen || NULL == (t = (int *) bstr__realloc (d, sl))) {
- ret = BSTR_ERR;
- goto done;
- }
- if (NULL == d) bstr__memcpy (t, static_d, sizeof (static_d));
- d = t;
- }
- d[slen] = pos;
- slen++;
- acc -= delta;
- pos += auxf->slen;
- if (pos < 0 || acc < 0) {
- ret = BSTR_ERR;
- goto done;
- }
- }
-
- /* slen <= INITIAL_STATIC_INDEX_COUNT-1 or mlen-1 here. */
- d[slen] = b->slen;
-
- if (BSTR_OK == (ret = balloc (b, b->slen + acc + 1))) {
- b->slen += acc;
- for (i = slen-1; i >= 0; i--) {
- int s, l;
- s = d[i] + auxf->slen;
- l = d[i+1] - s; /* d[slen] may be accessed here. */
- if (l) {
- bstr__memmove (b->data + s + acc, b->data + s, l);
- }
- if (auxr->slen) {
- bstr__memmove (b->data + s + acc - auxr->slen,
- auxr->data, auxr->slen);
- }
- acc += delta;
- }
- b->data[b->slen] = (unsigned char) '\0';
- }
-
- done:;
- if (static_d == d) d = NULL;
- bstr__free (d);
- if (auxf != find) bdestroy (auxf);
- if (auxr != repl) bdestroy (auxr);
- return ret;
-}
-
-/* int bfindreplace (bstring b, const_bstring find, const_bstring repl,
- * int pos)
- *
- * Replace all occurrences of a find string with a replace string after a
- * given point in a bstring.
- */
-int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos) {
- return findreplaceengine (b, find, repl, pos, binstr);
-}
-
-/* int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl,
- * int pos)
- *
- * Replace all occurrences of a find string, ignoring case, with a replace
- * string after a given point in a bstring.
- */
-int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos) {
- return findreplaceengine (b, find, repl, pos, binstrcaseless);
-}
-
-/* int binsertch (bstring b, int pos, int len, unsigned char fill)
- *
- * Inserts the character fill repeatedly into b at position pos for a
- * length len. If the position pos is past the end of b, then the
- * character "fill" is appended as necessary to make up the gap between the
- * end of b and the position pos + len.
- */
-int binsertch (bstring b, int pos, int len, unsigned char fill) {
-int d, l, i;
-
- if (pos < 0 || b == NULL || b->slen < 0 || b->mlen < b->slen ||
- b->mlen <= 0 || len < 0) return BSTR_ERR;
-
- /* Compute the two possible end pointers */
- d = b->slen + len;
- l = pos + len;
- if ((d|l) < 0) return BSTR_ERR;
-
- if (l > d) {
- /* Inserting past the end of the string */
- if (balloc (b, l + 1) != BSTR_OK) return BSTR_ERR;
- pos = b->slen;
- b->slen = l;
- } else {
- /* Inserting in the middle of the string */
- if (balloc (b, d + 1) != BSTR_OK) return BSTR_ERR;
- for (i = d - 1; i >= l; i--) {
- b->data[i] = b->data[i - len];
- }
- b->slen = d;
- }
-
- for (i=pos; i < l; i++) b->data[i] = fill;
- b->data[b->slen] = (unsigned char) '\0';
- return BSTR_OK;
-}
-
-/* int bpattern (bstring b, int len)
- *
- * Replicate the bstring, b in place, end to end repeatedly until it
- * surpasses len characters, then chop the result to exactly len characters.
- * This function operates in-place. The function will return with BSTR_ERR
- * if b is NULL or of length 0, otherwise BSTR_OK is returned.
- */
-int bpattern (bstring b, int len) {
-int i, d;
-
- d = blength (b);
- if (d <= 0 || len < 0 || balloc (b, len + 1) != BSTR_OK) return BSTR_ERR;
- if (len > 0) {
- if (d == 1) return bsetstr (b, len, NULL, b->data[0]);
- for (i = d; i < len; i++) b->data[i] = b->data[i - d];
- }
- b->data[len] = (unsigned char) '\0';
- b->slen = len;
- return BSTR_OK;
-}
-
-#define BS_BUFF_SZ (1024)
-
-/* int breada (bstring b, bNread readPtr, void * parm)
- *
- * Use a finite buffer fread-like function readPtr to concatenate to the
- * bstring b the entire contents of file-like source data in a roughly
- * efficient way.
- */
-int breada (bstring b, bNread readPtr, void * parm) {
-int i, l, n;
-
- if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen ||
- b->mlen <= 0 || readPtr == NULL) return BSTR_ERR;
-
- i = b->slen;
- for (n=i+16; ; n += ((n < BS_BUFF_SZ) ? n : BS_BUFF_SZ)) {
- if (BSTR_OK != balloc (b, n + 1)) return BSTR_ERR;
- l = (int) readPtr ((void *) (b->data + i), 1, n - i, parm);
- i += l;
- b->slen = i;
- if (i < n) break;
- }
-
- b->data[i] = (unsigned char) '\0';
- return BSTR_OK;
-}
-
-/* bstring bread (bNread readPtr, void * parm)
- *
- * Use a finite buffer fread-like function readPtr to create a bstring
- * filled with the entire contents of file-like source data in a roughly
- * efficient way.
- */
-bstring bread (bNread readPtr, void * parm) {
-bstring buff;
-
- if (0 > breada (buff = bfromcstr (""), readPtr, parm)) {
- bdestroy (buff);
- return NULL;
- }
- return buff;
-}
-
-/* int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator)
- *
- * Use an fgetc-like single character stream reading function (getcPtr) to
- * obtain a sequence of characters which are concatenated to the end of the
- * bstring b. The stream read is terminated by the passed in terminator
- * parameter.
- *
- * If getcPtr returns with a negative number, or the terminator character
- * (which is appended) is read, then the stream reading is halted and the
- * function returns with a partial result in b. If there is an empty partial
- * result, 1 is returned. If no characters are read, or there is some other
- * detectable error, BSTR_ERR is returned.
- */
-int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator) {
-int c, d, e;
-
- if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen ||
- b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR;
- d = 0;
- e = b->mlen - 2;
-
- while ((c = getcPtr (parm)) >= 0) {
- if (d > e) {
- b->slen = d;
- if (balloc (b, d + 2) != BSTR_OK) return BSTR_ERR;
- e = b->mlen - 2;
- }
- b->data[d] = (unsigned char) c;
- d++;
- if (c == terminator) break;
- }
-
- b->data[d] = (unsigned char) '\0';
- b->slen = d;
-
- return d == 0 && c < 0;
-}
-
-/* int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator)
- *
- * Use an fgetc-like single character stream reading function (getcPtr) to
- * obtain a sequence of characters which are concatenated to the end of the
- * bstring b. The stream read is terminated by the passed in terminator
- * parameter.
- *
- * If getcPtr returns with a negative number, or the terminator character
- * (which is appended) is read, then the stream reading is halted and the
- * function returns with a partial result concatentated to b. If there is
- * an empty partial result, 1 is returned. If no characters are read, or
- * there is some other detectable error, BSTR_ERR is returned.
- */
-int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator) {
-int c, d, e;
-
- if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen ||
- b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR;
- d = b->slen;
- e = b->mlen - 2;
-
- while ((c = getcPtr (parm)) >= 0) {
- if (d > e) {
- b->slen = d;
- if (balloc (b, d + 2) != BSTR_OK) return BSTR_ERR;
- e = b->mlen - 2;
- }
- b->data[d] = (unsigned char) c;
- d++;
- if (c == terminator) break;
- }
-
- b->data[d] = (unsigned char) '\0';
- b->slen = d;
-
- return d == 0 && c < 0;
-}
-
-/* bstring bgets (bNgetc getcPtr, void * parm, char terminator)
- *
- * Use an fgetc-like single character stream reading function (getcPtr) to
- * obtain a sequence of characters which are concatenated into a bstring.
- * The stream read is terminated by the passed in terminator function.
- *
- * If getcPtr returns with a negative number, or the terminator character
- * (which is appended) is read, then the stream reading is halted and the
- * result obtained thus far is returned. If no characters are read, or
- * there is some other detectable error, NULL is returned.
- */
-bstring bgets (bNgetc getcPtr, void * parm, char terminator) {
-bstring buff;
-
- if (0 > bgetsa (buff = bfromcstr (""), getcPtr, parm, terminator) || 0 >= buff->slen) {
- bdestroy (buff);
- buff = NULL;
- }
- return buff;
-}
-
-struct bStream {
- bstring buff; /* Buffer for over-reads */
- void * parm; /* The stream handle for core stream */
- bNread readFnPtr; /* fread compatible fnptr for core stream */
- int isEOF; /* track file's EOF state */
- int maxBuffSz;
-};
-
-/* struct bStream * bsopen (bNread readPtr, void * parm)
- *
- * Wrap a given open stream (described by a fread compatible function
- * pointer and stream handle) into an open bStream suitable for the bstring
- * library streaming functions.
- */
-struct bStream * bsopen (bNread readPtr, void * parm) {
-struct bStream * s;
-
- if (readPtr == NULL) return NULL;
- s = (struct bStream *) bstr__alloc (sizeof (struct bStream));
- if (s == NULL) return NULL;
- s->parm = parm;
- s->buff = bfromcstr ("");
- s->readFnPtr = readPtr;
- s->maxBuffSz = BS_BUFF_SZ;
- s->isEOF = 0;
- return s;
-}
-
-/* int bsbufflength (struct bStream * s, int sz)
- *
- * Set the length of the buffer used by the bStream. If sz is zero, the
- * length is not set. This function returns with the previous length.
- */
-int bsbufflength (struct bStream * s, int sz) {
-int oldSz;
- if (s == NULL || sz < 0) return BSTR_ERR;
- oldSz = s->maxBuffSz;
- if (sz > 0) s->maxBuffSz = sz;
- return oldSz;
-}
-
-int bseof (const struct bStream * s) {
- if (s == NULL || s->readFnPtr == NULL) return BSTR_ERR;
- return s->isEOF && (s->buff->slen == 0);
-}
-
-/* void * bsclose (struct bStream * s)
- *
- * Close the bStream, and return the handle to the stream that was originally
- * used to open the given stream.
- */
-void * bsclose (struct bStream * s) {
-void * parm;
- if (s == NULL) return NULL;
- s->readFnPtr = NULL;
- if (s->buff) bdestroy (s->buff);
- s->buff = NULL;
- parm = s->parm;
- s->parm = NULL;
- s->isEOF = 1;
- bstr__free (s);
- return parm;
-}
-
-/* int bsreadlna (bstring r, struct bStream * s, char terminator)
- *
- * Read a bstring terminated by the terminator character or the end of the
- * stream from the bStream (s) and return it into the parameter r. This
- * function may read additional characters from the core stream that are not
- * returned, but will be retained for subsequent read operations.
- */
-int bsreadlna (bstring r, struct bStream * s, char terminator) {
-int i, l, ret, rlo;
-char * b;
-struct tagbstring x;
-
- if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 ||
- r->slen < 0 || r->mlen < r->slen) return BSTR_ERR;
- l = s->buff->slen;
- if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
- b = (char *) s->buff->data;
- x.data = (unsigned char *) b;
-
- /* First check if the current buffer holds the terminator */
- b[l] = terminator; /* Set sentinel */
- for (i=0; b[i] != terminator; i++) ;
- if (i < l) {
- x.slen = i + 1;
- ret = bconcat (r, &x);
- s->buff->slen = l;
- if (BSTR_OK == ret) bdelete (s->buff, 0, i + 1);
- return BSTR_OK;
- }
-
- rlo = r->slen;
-
- /* If not then just concatenate the entire buffer to the output */
- x.slen = l;
- if (BSTR_OK != bconcat (r, &x)) return BSTR_ERR;
-
- /* Perform direct in-place reads into the destination to allow for
- the minimum of data-copies */
- for (;;) {
- if (BSTR_OK != balloc (r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR;
- b = (char *) (r->data + r->slen);
- l = (int) s->readFnPtr (b, 1, s->maxBuffSz, s->parm);
- if (l <= 0) {
- r->data[r->slen] = (unsigned char) '\0';
- s->buff->slen = 0;
- s->isEOF = 1;
- /* If nothing was read return with an error message */
- return BSTR_ERR & -(r->slen == rlo);
- }
- b[l] = terminator; /* Set sentinel */
- for (i=0; b[i] != terminator; i++) ;
- if (i < l) break;
- r->slen += l;
- }
-
- /* Terminator found, push over-read back to buffer */
- i++;
- r->slen += i;
- s->buff->slen = l - i;
- bstr__memcpy (s->buff->data, b + i, l - i);
- r->data[r->slen] = (unsigned char) '\0';
- return BSTR_OK;
-}
-
-/* int bsreadlnsa (bstring r, struct bStream * s, bstring term)
- *
- * Read a bstring terminated by any character in the term string or the end
- * of the stream from the bStream (s) and return it into the parameter r.
- * This function may read additional characters from the core stream that
- * are not returned, but will be retained for subsequent read operations.
- */
-int bsreadlnsa (bstring r, struct bStream * s, const_bstring term) {
-int i, l, ret, rlo;
-unsigned char * b;
-struct tagbstring x;
-struct charField cf;
-
- if (s == NULL || s->buff == NULL || r == NULL || term == NULL ||
- term->data == NULL || r->mlen <= 0 || r->slen < 0 ||
- r->mlen < r->slen) return BSTR_ERR;
- if (term->slen == 1) return bsreadlna (r, s, term->data[0]);
- if (term->slen < 1 || buildCharField (&cf, term)) return BSTR_ERR;
-
- l = s->buff->slen;
- if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
- b = (unsigned char *) s->buff->data;
- x.data = b;
-
- /* First check if the current buffer holds the terminator */
- b[l] = term->data[0]; /* Set sentinel */
- for (i=0; !testInCharField (&cf, b[i]); i++) ;
- if (i < l) {
- x.slen = i + 1;
- ret = bconcat (r, &x);
- s->buff->slen = l;
- if (BSTR_OK == ret) bdelete (s->buff, 0, i + 1);
- return BSTR_OK;
- }
-
- rlo = r->slen;
-
- /* If not then just concatenate the entire buffer to the output */
- x.slen = l;
- if (BSTR_OK != bconcat (r, &x)) return BSTR_ERR;
-
- /* Perform direct in-place reads into the destination to allow for
- the minimum of data-copies */
- for (;;) {
- if (BSTR_OK != balloc (r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR;
- b = (unsigned char *) (r->data + r->slen);
- l = (int) s->readFnPtr (b, 1, s->maxBuffSz, s->parm);
- if (l <= 0) {
- r->data[r->slen] = (unsigned char) '\0';
- s->buff->slen = 0;
- s->isEOF = 1;
- /* If nothing was read return with an error message */
- return BSTR_ERR & -(r->slen == rlo);
- }
-
- b[l] = term->data[0]; /* Set sentinel */
- for (i=0; !testInCharField (&cf, b[i]); i++) ;
- if (i < l) break;
- r->slen += l;
- }
-
- /* Terminator found, push over-read back to buffer */
- i++;
- r->slen += i;
- s->buff->slen = l - i;
- bstr__memcpy (s->buff->data, b + i, l - i);
- r->data[r->slen] = (unsigned char) '\0';
- return BSTR_OK;
-}
-
-/* int bsreada (bstring r, struct bStream * s, int n)
- *
- * Read a bstring of length n (or, if it is fewer, as many bytes as is
- * remaining) from the bStream. This function may read additional
- * characters from the core stream that are not returned, but will be
- * retained for subsequent read operations. This function will not read
- * additional characters from the core stream beyond virtual stream pointer.
- */
-int bsreada (bstring r, struct bStream * s, int n) {
-int l, ret, orslen;
-char * b;
-struct tagbstring x;
-
- if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0
- || r->slen < 0 || r->mlen < r->slen || n <= 0) return BSTR_ERR;
-
- n += r->slen;
- if (n <= 0) return BSTR_ERR;
-
- l = s->buff->slen;
-
- orslen = r->slen;
-
- if (0 == l) {
- if (s->isEOF) return BSTR_ERR;
- if (r->mlen > n) {
- l = (int) s->readFnPtr (r->data + r->slen, 1, n - r->slen, s->parm);
- if (0 >= l || l > n - r->slen) {
- s->isEOF = 1;
- return BSTR_ERR;
- }
- r->slen += l;
- r->data[r->slen] = (unsigned char) '\0';
- return 0;
- }
- }
-
- if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
- b = (char *) s->buff->data;
- x.data = (unsigned char *) b;
-
- do {
- if (l + r->slen >= n) {
- x.slen = n - r->slen;
- ret = bconcat (r, &x);
- s->buff->slen = l;
- if (BSTR_OK == ret) bdelete (s->buff, 0, x.slen);
- return BSTR_ERR & -(r->slen == orslen);
- }
-
- x.slen = l;
- if (BSTR_OK != bconcat (r, &x)) break;
-
- l = n - r->slen;
- if (l > s->maxBuffSz) l = s->maxBuffSz;
-
- l = (int) s->readFnPtr (b, 1, l, s->parm);
-
- } while (l > 0);
- if (l < 0) l = 0;
- if (l == 0) s->isEOF = 1;
- s->buff->slen = l;
- return BSTR_ERR & -(r->slen == orslen);
-}
-
-/* int bsreadln (bstring r, struct bStream * s, char terminator)
- *
- * Read a bstring terminated by the terminator character or the end of the
- * stream from the bStream (s) and return it into the parameter r. This
- * function may read additional characters from the core stream that are not
- * returned, but will be retained for subsequent read operations.
- */
-int bsreadln (bstring r, struct bStream * s, char terminator) {
- if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0)
- return BSTR_ERR;
- if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
- r->slen = 0;
- return bsreadlna (r, s, terminator);
-}
-
-/* int bsreadlns (bstring r, struct bStream * s, bstring term)
- *
- * Read a bstring terminated by any character in the term string or the end
- * of the stream from the bStream (s) and return it into the parameter r.
- * This function may read additional characters from the core stream that
- * are not returned, but will be retained for subsequent read operations.
- */
-int bsreadlns (bstring r, struct bStream * s, const_bstring term) {
- if (s == NULL || s->buff == NULL || r == NULL || term == NULL
- || term->data == NULL || r->mlen <= 0) return BSTR_ERR;
- if (term->slen == 1) return bsreadln (r, s, term->data[0]);
- if (term->slen < 1) return BSTR_ERR;
- if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
- r->slen = 0;
- return bsreadlnsa (r, s, term);
-}
-
-/* int bsread (bstring r, struct bStream * s, int n)
- *
- * Read a bstring of length n (or, if it is fewer, as many bytes as is
- * remaining) from the bStream. This function may read additional
- * characters from the core stream that are not returned, but will be
- * retained for subsequent read operations. This function will not read
- * additional characters from the core stream beyond virtual stream pointer.
- */
-int bsread (bstring r, struct bStream * s, int n) {
- if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0
- || n <= 0) return BSTR_ERR;
- if (BSTR_OK != balloc (s->buff, s->maxBuffSz + 1)) return BSTR_ERR;
- r->slen = 0;
- return bsreada (r, s, n);
-}
-
-/* int bsunread (struct bStream * s, const_bstring b)
- *
- * Insert a bstring into the bStream at the current position. These
- * characters will be read prior to those that actually come from the core
- * stream.
- */
-int bsunread (struct bStream * s, const_bstring b) {
- if (s == NULL || s->buff == NULL) return BSTR_ERR;
- return binsert (s->buff, 0, b, (unsigned char) '?');
-}
-
-/* int bspeek (bstring r, const struct bStream * s)
- *
- * Return the currently buffered characters from the bStream that will be
- * read prior to reads from the core stream.
- */
-int bspeek (bstring r, const struct bStream * s) {
- if (s == NULL || s->buff == NULL) return BSTR_ERR;
- return bassign (r, s->buff);
-}
-
-/* bstring bjoin (const struct bstrList * bl, const_bstring sep);
- *
- * Join the entries of a bstrList into one bstring by sequentially
- * concatenating them with the sep string in between. If there is an error
- * NULL is returned, otherwise a bstring with the correct result is returned.
- */
-bstring bjoin (const struct bstrList * bl, const_bstring sep) {
-bstring b;
-int i, c, v;
-
- if (bl == NULL || bl->qty < 0) return NULL;
- if (sep != NULL && (sep->slen < 0 || sep->data == NULL)) return NULL;
-
- for (i = 0, c = 1; i < bl->qty; i++) {
- v = bl->entry[i]->slen;
- if (v < 0) return NULL; /* Invalid input */
- c += v;
- if (c < 0) return NULL; /* Wrap around ?? */
- }
-
- if (sep != NULL) c += (bl->qty - 1) * sep->slen;
-
- b = (bstring) bstr__alloc (sizeof (struct tagbstring));
- if (NULL == b) return NULL; /* Out of memory */
- b->data = (unsigned char *) bstr__alloc (c);
- if (b->data == NULL) {
- bstr__free (b);
- return NULL;
- }
-
- b->mlen = c;
- b->slen = c-1;
-
- for (i = 0, c = 0; i < bl->qty; i++) {
- if (i > 0 && sep != NULL) {
- bstr__memcpy (b->data + c, sep->data, sep->slen);
- c += sep->slen;
- }
- v = bl->entry[i]->slen;
- bstr__memcpy (b->data + c, bl->entry[i]->data, v);
- c += v;
- }
- b->data[c] = (unsigned char) '\0';
- return b;
-}
-
-#define BSSSC_BUFF_LEN (256)
-
-/* int bssplitscb (struct bStream * s, const_bstring splitStr,
- * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm)
- *
- * Iterate the set of disjoint sequential substrings read from a stream
- * divided by any of the characters in splitStr. An empty splitStr causes
- * the whole stream to be iterated once.
- *
- * Note: At the point of calling the cb function, the bStream pointer is
- * pointed exactly at the position right after having read the split
- * character. The cb function can act on the stream by causing the bStream
- * pointer to move, and bssplitscb will continue by starting the next split
- * at the position of the pointer after the return from cb.
- *
- * However, if the cb causes the bStream s to be destroyed then the cb must
- * return with a negative value, otherwise bssplitscb will continue in an
- * undefined manner.
- */
-int bssplitscb (struct bStream * s, const_bstring splitStr,
- int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) {
-struct charField chrs;
-bstring buff;
-int i, p, ret;
-
- if (cb == NULL || s == NULL || s->readFnPtr == NULL
- || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR;
-
- if (NULL == (buff = bfromcstr (""))) return BSTR_ERR;
-
- if (splitStr->slen == 0) {
- while (bsreada (buff, s, BSSSC_BUFF_LEN) >= 0) ;
- if ((ret = cb (parm, 0, buff)) > 0)
- ret = 0;
- } else {
- buildCharField (&chrs, splitStr);
- ret = p = i = 0;
- for (;;) {
- if (i >= buff->slen) {
- bsreada (buff, s, BSSSC_BUFF_LEN);
- if (i >= buff->slen) {
- if (0 < (ret = cb (parm, p, buff))) ret = 0;
- break;
- }
- }
- if (testInCharField (&chrs, buff->data[i])) {
- struct tagbstring t;
- unsigned char c;
-
- blk2tbstr (t, buff->data + i + 1, buff->slen - (i + 1));
- if ((ret = bsunread (s, &t)) < 0) break;
- buff->slen = i;
- c = buff->data[i];
- buff->data[i] = (unsigned char) '\0';
- if ((ret = cb (parm, p, buff)) < 0) break;
- buff->data[i] = c;
- buff->slen = 0;
- p += i + 1;
- i = -1;
- }
- i++;
- }
- }
-
- bdestroy (buff);
- return ret;
-}
-
-/* int bssplitstrcb (struct bStream * s, const_bstring splitStr,
- * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm)
- *
- * Iterate the set of disjoint sequential substrings read from a stream
- * divided by the entire substring splitStr. An empty splitStr causes
- * each character of the stream to be iterated.
- *
- * Note: At the point of calling the cb function, the bStream pointer is
- * pointed exactly at the position right after having read the split
- * character. The cb function can act on the stream by causing the bStream
- * pointer to move, and bssplitscb will continue by starting the next split
- * at the position of the pointer after the return from cb.
- *
- * However, if the cb causes the bStream s to be destroyed then the cb must
- * return with a negative value, otherwise bssplitscb will continue in an
- * undefined manner.
- */
-int bssplitstrcb (struct bStream * s, const_bstring splitStr,
- int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) {
-bstring buff;
-int i, p, ret;
-
- if (cb == NULL || s == NULL || s->readFnPtr == NULL
- || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR;
-
- if (splitStr->slen == 1) return bssplitscb (s, splitStr, cb, parm);
-
- if (NULL == (buff = bfromcstr (""))) return BSTR_ERR;
-
- if (splitStr->slen == 0) {
- for (i=0; bsreada (buff, s, BSSSC_BUFF_LEN) >= 0; i++) {
- if ((ret = cb (parm, 0, buff)) < 0) {
- bdestroy (buff);
- return ret;
- }
- buff->slen = 0;
- }
- return BSTR_OK;
- } else {
- ret = p = i = 0;
- for (i=p=0;;) {
- if ((ret = binstr (buff, 0, splitStr)) >= 0) {
- struct tagbstring t;
- blk2tbstr (t, buff->data, ret);
- i = ret + splitStr->slen;
- if ((ret = cb (parm, p, &t)) < 0) break;
- p += i;
- bdelete (buff, 0, i);
- } else {
- bsreada (buff, s, BSSSC_BUFF_LEN);
- if (bseof (s)) {
- if ((ret = cb (parm, p, buff)) > 0) ret = 0;
- break;
- }
- }
- }
- }
-
- bdestroy (buff);
- return ret;
-}
-
-/* int bstrListCreate (void)
- *
- * Create a bstrList.
- */
-struct bstrList * bstrListCreate (void) {
-struct bstrList * sl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList));
- if (sl) {
- sl->entry = (bstring *) bstr__alloc (1*sizeof (bstring));
- if (!sl->entry) {
- bstr__free (sl);
- sl = NULL;
- } else {
- sl->qty = 0;
- sl->mlen = 1;
- }
- }
- return sl;
-}
-
-/* int bstrListDestroy (struct bstrList * sl)
- *
- * Destroy a bstrList that has been created by bsplit, bsplits or bstrListCreate.
- */
-int bstrListDestroy (struct bstrList * sl) {
-int i;
- if (sl == NULL || sl->qty < 0) return BSTR_ERR;
- for (i=0; i < sl->qty; i++) {
- if (sl->entry[i]) {
- bdestroy (sl->entry[i]);
- sl->entry[i] = NULL;
- }
- }
- sl->qty = -1;
- sl->mlen = -1;
- bstr__free (sl->entry);
- sl->entry = NULL;
- bstr__free (sl);
- return BSTR_OK;
-}
-
-/* int bstrListAlloc (struct bstrList * sl, int msz)
- *
- * Ensure that there is memory for at least msz number of entries for the
- * list.
- */
-int bstrListAlloc (struct bstrList * sl, int msz) {
-bstring * l;
-int smsz;
-size_t nsz;
- if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR;
- if (sl->mlen >= msz) return BSTR_OK;
- smsz = snapUpSize (msz);
- nsz = ((size_t) smsz) * sizeof (bstring);
- if (nsz < (size_t) smsz) return BSTR_ERR;
- l = (bstring *) bstr__realloc (sl->entry, nsz);
- if (!l) {
- smsz = msz;
- nsz = ((size_t) smsz) * sizeof (bstring);
- l = (bstring *) bstr__realloc (sl->entry, nsz);
- if (!l) return BSTR_ERR;
- }
- sl->mlen = smsz;
- sl->entry = l;
- return BSTR_OK;
-}
-
-/* int bstrListAllocMin (struct bstrList * sl, int msz)
- *
- * Try to allocate the minimum amount of memory for the list to include at
- * least msz entries or sl->qty whichever is greater.
- */
-int bstrListAllocMin (struct bstrList * sl, int msz) {
-bstring * l;
-size_t nsz;
- if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR;
- if (msz < sl->qty) msz = sl->qty;
- if (sl->mlen == msz) return BSTR_OK;
- nsz = ((size_t) msz) * sizeof (bstring);
- if (nsz < (size_t) msz) return BSTR_ERR;
- l = (bstring *) bstr__realloc (sl->entry, nsz);
- if (!l) return BSTR_ERR;
- sl->mlen = msz;
- sl->entry = l;
- return BSTR_OK;
-}
-
-/* int bsplitcb (const_bstring str, unsigned char splitChar, int pos,
- * int (* cb) (void * parm, int ofs, int len), void * parm)
- *
- * Iterate the set of disjoint sequential substrings over str divided by the
- * character in splitChar.
- *
- * Note: Non-destructive modification of str from within the cb function
- * while performing this split is not undefined. bsplitcb behaves in
- * sequential lock step with calls to cb. I.e., after returning from a cb
- * that return a non-negative integer, bsplitcb continues from the position
- * 1 character after the last detected split character and it will halt
- * immediately if the length of str falls below this point. However, if the
- * cb function destroys str, then it *must* return with a negative value,
- * otherwise bsplitcb will continue in an undefined manner.
- */
-int bsplitcb (const_bstring str, unsigned char splitChar, int pos,
- int (* cb) (void * parm, int ofs, int len), void * parm) {
-int i, p, ret;
-
- if (cb == NULL || str == NULL || pos < 0 || pos > str->slen)
- return BSTR_ERR;
-
- p = pos;
- do {
- for (i=p; i < str->slen; i++) {
- if (str->data[i] == splitChar) break;
- }
- if ((ret = cb (parm, p, i - p)) < 0) return ret;
- p = i + 1;
- } while (p <= str->slen);
- return BSTR_OK;
-}
-
-/* int bsplitscb (const_bstring str, const_bstring splitStr, int pos,
- * int (* cb) (void * parm, int ofs, int len), void * parm)
- *
- * Iterate the set of disjoint sequential substrings over str divided by any
- * of the characters in splitStr. An empty splitStr causes the whole str to
- * be iterated once.
- *
- * Note: Non-destructive modification of str from within the cb function
- * while performing this split is not undefined. bsplitscb behaves in
- * sequential lock step with calls to cb. I.e., after returning from a cb
- * that return a non-negative integer, bsplitscb continues from the position
- * 1 character after the last detected split character and it will halt
- * immediately if the length of str falls below this point. However, if the
- * cb function destroys str, then it *must* return with a negative value,
- * otherwise bsplitscb will continue in an undefined manner.
- */
-int bsplitscb (const_bstring str, const_bstring splitStr, int pos,
- int (* cb) (void * parm, int ofs, int len), void * parm) {
-struct charField chrs;
-int i, p, ret;
-
- if (cb == NULL || str == NULL || pos < 0 || pos > str->slen
- || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR;
- if (splitStr->slen == 0) {
- if ((ret = cb (parm, 0, str->slen)) > 0) ret = 0;
- return ret;
- }
-
- if (splitStr->slen == 1)
- return bsplitcb (str, splitStr->data[0], pos, cb, parm);
-
- buildCharField (&chrs, splitStr);
-
- p = pos;
- do {
- for (i=p; i < str->slen; i++) {
- if (testInCharField (&chrs, str->data[i])) break;
- }
- if ((ret = cb (parm, p, i - p)) < 0) return ret;
- p = i + 1;
- } while (p <= str->slen);
- return BSTR_OK;
-}
-
-/* int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos,
- * int (* cb) (void * parm, int ofs, int len), void * parm)
- *
- * Iterate the set of disjoint sequential substrings over str divided by the
- * substring splitStr. An empty splitStr causes the whole str to be
- * iterated once.
- *
- * Note: Non-destructive modification of str from within the cb function
- * while performing this split is not undefined. bsplitstrcb behaves in
- * sequential lock step with calls to cb. I.e., after returning from a cb
- * that return a non-negative integer, bsplitscb continues from the position
- * 1 character after the last detected split character and it will halt
- * immediately if the length of str falls below this point. However, if the
- * cb function destroys str, then it *must* return with a negative value,
- * otherwise bsplitscb will continue in an undefined manner.
- */
-int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos,
- int (* cb) (void * parm, int ofs, int len), void * parm) {
-int i, p, ret;
-
- if (cb == NULL || str == NULL || pos < 0 || pos > str->slen
- || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR;
-
- if (0 == splitStr->slen) {
- for (i=pos; i < str->slen; i++) {
- if ((ret = cb (parm, i, 1)) < 0) return ret;
- }
- return BSTR_OK;
- }
-
- if (splitStr->slen == 1)
- return bsplitcb (str, splitStr->data[0], pos, cb, parm);
-
- for (i=p=pos; i <= str->slen - splitStr->slen; i++) {
- if (0 == bstr__memcmp (splitStr->data, str->data + i, splitStr->slen)) {
- if ((ret = cb (parm, p, i - p)) < 0) return ret;
- i += splitStr->slen;
- p = i;
- }
- }
- if ((ret = cb (parm, p, str->slen - p)) < 0) return ret;
- return BSTR_OK;
-}
-
-struct genBstrList {
- bstring b;
- struct bstrList * bl;
-};
-
-static int bscb (void * parm, int ofs, int len) {
-struct genBstrList * g = (struct genBstrList *) parm;
- if (g->bl->qty >= g->bl->mlen) {
- int mlen = g->bl->mlen * 2;
- bstring * tbl;
-
- while (g->bl->qty >= mlen) {
- if (mlen < g->bl->mlen) return BSTR_ERR;
- mlen += mlen;
- }
-
- tbl = (bstring *) bstr__realloc (g->bl->entry, sizeof (bstring) * mlen);
- if (tbl == NULL) return BSTR_ERR;
-
- g->bl->entry = tbl;
- g->bl->mlen = mlen;
- }
-
- g->bl->entry[g->bl->qty] = bmidstr (g->b, ofs, len);
- g->bl->qty++;
- return BSTR_OK;
-}
-
-/* struct bstrList * bsplit (const_bstring str, unsigned char splitChar)
- *
- * Create an array of sequential substrings from str divided by the character
- * splitChar.
- */
-struct bstrList * bsplit (const_bstring str, unsigned char splitChar) {
-struct genBstrList g;
-
- if (str == NULL || str->data == NULL || str->slen < 0) return NULL;
-
- g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList));
- if (g.bl == NULL) return NULL;
- g.bl->mlen = 4;
- g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring));
- if (NULL == g.bl->entry) {
- bstr__free (g.bl);
- return NULL;
- }
-
- g.b = (bstring) str;
- g.bl->qty = 0;
- if (bsplitcb (str, splitChar, 0, bscb, &g) < 0) {
- bstrListDestroy (g.bl);
- return NULL;
- }
- return g.bl;
-}
-
-/* struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr)
- *
- * Create an array of sequential substrings from str divided by the entire
- * substring splitStr.
- */
-struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr) {
-struct genBstrList g;
-
- if (str == NULL || str->data == NULL || str->slen < 0) return NULL;
-
- g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList));
- if (g.bl == NULL) return NULL;
- g.bl->mlen = 4;
- g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring));
- if (NULL == g.bl->entry) {
- bstr__free (g.bl);
- return NULL;
- }
-
- g.b = (bstring) str;
- g.bl->qty = 0;
- if (bsplitstrcb (str, splitStr, 0, bscb, &g) < 0) {
- bstrListDestroy (g.bl);
- return NULL;
- }
- return g.bl;
-}
-
-/* struct bstrList * bsplits (const_bstring str, bstring splitStr)
- *
- * Create an array of sequential substrings from str divided by any of the
- * characters in splitStr. An empty splitStr causes a single entry bstrList
- * containing a copy of str to be returned.
- */
-struct bstrList * bsplits (const_bstring str, const_bstring splitStr) {
-struct genBstrList g;
-
- if ( str == NULL || str->slen < 0 || str->data == NULL ||
- splitStr == NULL || splitStr->slen < 0 || splitStr->data == NULL)
- return NULL;
-
- g.bl = (struct bstrList *) bstr__alloc (sizeof (struct bstrList));
- if (g.bl == NULL) return NULL;
- g.bl->mlen = 4;
- g.bl->entry = (bstring *) bstr__alloc (g.bl->mlen * sizeof (bstring));
- if (NULL == g.bl->entry) {
- bstr__free (g.bl);
- return NULL;
- }
- g.b = (bstring) str;
- g.bl->qty = 0;
-
- if (bsplitscb (str, splitStr, 0, bscb, &g) < 0) {
- bstrListDestroy (g.bl);
- return NULL;
- }
- return g.bl;
-}
-
-#if defined (__TURBOC__) && !defined (__BORLANDC__)
-# ifndef BSTRLIB_NOVSNP
-# define BSTRLIB_NOVSNP
-# endif
-#endif
-
-/* Give WATCOM C/C++, MSVC some latitude for their non-support of vsnprintf */
-#if defined(__WATCOMC__) || defined(_MSC_VER)
-#define exvsnprintf(r,b,n,f,a) {r = _vsnprintf (b,n,f,a);}
-#else
-#ifdef BSTRLIB_NOVSNP
-/* This is just a hack. If you are using a system without a vsnprintf, it is
- not recommended that bformat be used at all. */
-#define exvsnprintf(r,b,n,f,a) {vsprintf (b,f,a); r = -1;}
-#define START_VSNBUFF (256)
-#else
-
-#if defined(__GNUC__) && !defined(__APPLE__)
-/* Something is making gcc complain about this prototype not being here, so
- I've just gone ahead and put it in. */
-extern int vsnprintf (char *buf, size_t count, const char *format, va_list arg);
-#endif
-
-#define exvsnprintf(r,b,n,f,a) {r = vsnprintf (b,n,f,a);}
-#endif
-#endif
-
-#if !defined (BSTRLIB_NOVSNP)
-
-#ifndef START_VSNBUFF
-#define START_VSNBUFF (16)
-#endif
-
-/* On IRIX vsnprintf returns n-1 when the operation would overflow the target
- buffer, WATCOM and MSVC both return -1, while C99 requires that the
- returned value be exactly what the length would be if the buffer would be
- large enough. This leads to the idea that if the return value is larger
- than n, then changing n to the return value will reduce the number of
- iterations required. */
-
-/* int bformata (bstring b, const char * fmt, ...)
- *
- * After the first parameter, it takes the same parameters as printf (), but
- * rather than outputting results to stdio, it appends the results to
- * a bstring which contains what would have been output. Note that if there
- * is an early generation of a '\0' character, the bstring will be truncated
- * to this end point.
- */
-int bformata (bstring b, const char * fmt, ...) {
-va_list arglist;
-bstring buff;
-int n, r;
-
- if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0
- || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR;
-
- /* Since the length is not determinable beforehand, a search is
- performed using the truncating "vsnprintf" call (to avoid buffer
- overflows) on increasing potential sizes for the output result. */
-
- if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF;
- if (NULL == (buff = bfromcstralloc (n + 2, ""))) {
- n = 1;
- if (NULL == (buff = bfromcstralloc (n + 2, ""))) return BSTR_ERR;
- }
-
- for (;;) {
- va_start (arglist, fmt);
- exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist);
- va_end (arglist);
-
- buff->data[n] = (unsigned char) '\0';
- buff->slen = (int) (strlen) ((char *) buff->data);
-
- if (buff->slen < n) break;
-
- if (r > n) n = r; else n += n;
-
- if (BSTR_OK != balloc (buff, n + 2)) {
- bdestroy (buff);
- return BSTR_ERR;
- }
- }
-
- r = bconcat (b, buff);
- bdestroy (buff);
- return r;
-}
-
-/* int bassignformat (bstring b, const char * fmt, ...)
- *
- * After the first parameter, it takes the same parameters as printf (), but
- * rather than outputting results to stdio, it outputs the results to
- * the bstring parameter b. Note that if there is an early generation of a
- * '\0' character, the bstring will be truncated to this end point.
- */
-int bassignformat (bstring b, const char * fmt, ...) {
-va_list arglist;
-bstring buff;
-int n, r;
-
- if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0
- || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR;
-
- /* Since the length is not determinable beforehand, a search is
- performed using the truncating "vsnprintf" call (to avoid buffer
- overflows) on increasing potential sizes for the output result. */
-
- if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF;
- if (NULL == (buff = bfromcstralloc (n + 2, ""))) {
- n = 1;
- if (NULL == (buff = bfromcstralloc (n + 2, ""))) return BSTR_ERR;
- }
-
- for (;;) {
- va_start (arglist, fmt);
- exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist);
- va_end (arglist);
-
- buff->data[n] = (unsigned char) '\0';
- buff->slen = (int) (strlen) ((char *) buff->data);
-
- if (buff->slen < n) break;
-
- if (r > n) n = r; else n += n;
-
- if (BSTR_OK != balloc (buff, n + 2)) {
- bdestroy (buff);
- return BSTR_ERR;
- }
- }
-
- r = bassign (b, buff);
- bdestroy (buff);
- return r;
-}
-
-/* bstring bformat (const char * fmt, ...)
- *
- * Takes the same parameters as printf (), but rather than outputting results
- * to stdio, it forms a bstring which contains what would have been output.
- * Note that if there is an early generation of a '\0' character, the
- * bstring will be truncated to this end point.
- */
-bstring bformat (const char * fmt, ...) {
-va_list arglist;
-bstring buff;
-int n, r;
-
- if (fmt == NULL) return NULL;
-
- /* Since the length is not determinable beforehand, a search is
- performed using the truncating "vsnprintf" call (to avoid buffer
- overflows) on increasing potential sizes for the output result. */
-
- if ((n = (int) (2*strlen (fmt))) < START_VSNBUFF) n = START_VSNBUFF;
- if (NULL == (buff = bfromcstralloc (n + 2, ""))) {
- n = 1;
- if (NULL == (buff = bfromcstralloc (n + 2, ""))) return NULL;
- }
-
- for (;;) {
- va_start (arglist, fmt);
- exvsnprintf (r, (char *) buff->data, n + 1, fmt, arglist);
- va_end (arglist);
-
- buff->data[n] = (unsigned char) '\0';
- buff->slen = (int) (strlen) ((char *) buff->data);
-
- if (buff->slen < n) break;
-
- if (r > n) n = r; else n += n;
-
- if (BSTR_OK != balloc (buff, n + 2)) {
- bdestroy (buff);
- return NULL;
- }
- }
-
- return buff;
-}
-
-/* int bvcformata (bstring b, int count, const char * fmt, va_list arglist)
- *
- * The bvcformata function formats data under control of the format control
- * string fmt and attempts to append the result to b. The fmt parameter is
- * the same as that of the printf function. The variable argument list is
- * replaced with arglist, which has been initialized by the va_start macro.
- * The size of the appended output is upper bounded by count. If the
- * required output exceeds count, the string b is not augmented with any
- * contents and a value below BSTR_ERR is returned. If a value below -count
- * is returned then it is recommended that the negative of this value be
- * used as an update to the count in a subsequent pass. On other errors,
- * such as running out of memory, parameter errors or numeric wrap around
- * BSTR_ERR is returned. BSTR_OK is returned when the output is successfully
- * generated and appended to b.
- *
- * Note: There is no sanity checking of arglist, and this function is
- * destructive of the contents of b from the b->slen point onward. If there
- * is an early generation of a '\0' character, the bstring will be truncated
- * to this end point.
- */
-int bvcformata (bstring b, int count, const char * fmt, va_list arg) {
-int n, r, l;
-
- if (b == NULL || fmt == NULL || count <= 0 || b->data == NULL
- || b->mlen <= 0 || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR;
-
- if (count > (n = b->slen + count) + 2) return BSTR_ERR;
- if (BSTR_OK != balloc (b, n + 2)) return BSTR_ERR;
-
- exvsnprintf (r, (char *) b->data + b->slen, count + 2, fmt, arg);
-
- /* Did the operation complete successfully within bounds? */
- for (l = b->slen; l <= n; l++) {
- if ('\0' == b->data[l]) {
- b->slen = l;
- return BSTR_OK;
- }
- }
-
- /* Abort, since the buffer was not large enough. The return value
- tries to help set what the retry length should be. */
-
- b->data[b->slen] = '\0';
- if (r > count + 1) { /* Does r specify a particular target length? */
- n = r;
- } else {
- n = count + count; /* If not, just double the size of count */
- if (count > n) n = INT_MAX;
- }
- n = -n;
-
- if (n > BSTR_ERR-1) n = BSTR_ERR-1;
- return n;
-}
-
-#endif
diff --git a/src/bstrlib.h b/src/bstrlib.h
deleted file mode 100644
index c8fa694..0000000
--- a/src/bstrlib.h
+++ /dev/null
@@ -1,304 +0,0 @@
-/*
- * This source file is part of the bstring string library. This code was
- * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
- * BSD open source license or GPL v2.0. Refer to the accompanying documentation
- * for details on usage and license.
- */
-
-/*
- * bstrlib.h
- *
- * This file is the header file for the core module for implementing the
- * bstring functions.
- */
-
-#ifndef BSTRLIB_INCLUDE
-#define BSTRLIB_INCLUDE
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#include <stdarg.h>
-#include <string.h>
-#include <limits.h>
-#include <ctype.h>
-
-#if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP)
-# if defined (__TURBOC__) && !defined (__BORLANDC__)
-# define BSTRLIB_NOVSNP
-# endif
-#endif
-
-#define BSTR_ERR (-1)
-#define BSTR_OK (0)
-#define BSTR_BS_BUFF_LENGTH_GET (0)
-
-typedef struct tagbstring * bstring;
-typedef const struct tagbstring * const_bstring;
-
-/* Copy functions */
-#define cstr2bstr bfromcstr
-extern bstring bfromcstr (const char * str);
-extern bstring bfromcstralloc (int mlen, const char * str);
-extern bstring blk2bstr (const void * blk, int len);
-extern char * bstr2cstr (const_bstring s, char z);
-extern int bcstrfree (char * s);
-extern bstring bstrcpy (const_bstring b1);
-extern int bassign (bstring a, const_bstring b);
-extern int bassignmidstr (bstring a, const_bstring b, int left, int len);
-extern int bassigncstr (bstring a, const char * str);
-extern int bassignblk (bstring a, const void * s, int len);
-
-/* Destroy function */
-extern int bdestroy (bstring b);
-
-/* Space allocation hinting functions */
-extern int balloc (bstring s, int len);
-extern int ballocmin (bstring b, int len);
-
-/* Substring extraction */
-extern bstring bmidstr (const_bstring b, int left, int len);
-
-/* Various standard manipulations */
-extern int bconcat (bstring b0, const_bstring b1);
-extern int bconchar (bstring b0, char c);
-extern int bcatcstr (bstring b, const char * s);
-extern int bcatblk (bstring b, const void * s, int len);
-extern int binsert (bstring s1, int pos, const_bstring s2, unsigned char fill);
-extern int binsertch (bstring s1, int pos, int len, unsigned char fill);
-extern int breplace (bstring b1, int pos, int len, const_bstring b2, unsigned char fill);
-extern int bdelete (bstring s1, int pos, int len);
-extern int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill);
-extern int btrunc (bstring b, int n);
-
-/* Scan/search functions */
-extern int bstricmp (const_bstring b0, const_bstring b1);
-extern int bstrnicmp (const_bstring b0, const_bstring b1, int n);
-extern int biseqcaseless (const_bstring b0, const_bstring b1);
-extern int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len);
-extern int biseq (const_bstring b0, const_bstring b1);
-extern int bisstemeqblk (const_bstring b0, const void * blk, int len);
-extern int biseqcstr (const_bstring b, const char * s);
-extern int biseqcstrcaseless (const_bstring b, const char * s);
-extern int bstrcmp (const_bstring b0, const_bstring b1);
-extern int bstrncmp (const_bstring b0, const_bstring b1, int n);
-extern int binstr (const_bstring s1, int pos, const_bstring s2);
-extern int binstrr (const_bstring s1, int pos, const_bstring s2);
-extern int binstrcaseless (const_bstring s1, int pos, const_bstring s2);
-extern int binstrrcaseless (const_bstring s1, int pos, const_bstring s2);
-extern int bstrchrp (const_bstring b, int c, int pos);
-extern int bstrrchrp (const_bstring b, int c, int pos);
-#define bstrchr(b,c) bstrchrp ((b), (c), 0)
-#define bstrrchr(b,c) bstrrchrp ((b), (c), blength(b)-1)
-extern int binchr (const_bstring b0, int pos, const_bstring b1);
-extern int binchrr (const_bstring b0, int pos, const_bstring b1);
-extern int bninchr (const_bstring b0, int pos, const_bstring b1);
-extern int bninchrr (const_bstring b0, int pos, const_bstring b1);
-extern int bfindreplace (bstring b, const_bstring find, const_bstring repl, int pos);
-extern int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, int pos);
-
-/* List of string container functions */
-struct bstrList {
- int qty, mlen;
- bstring * entry;
-};
-extern struct bstrList * bstrListCreate (void);
-extern int bstrListDestroy (struct bstrList * sl);
-extern int bstrListAlloc (struct bstrList * sl, int msz);
-extern int bstrListAllocMin (struct bstrList * sl, int msz);
-
-/* String split and join functions */
-extern struct bstrList * bsplit (const_bstring str, unsigned char splitChar);
-extern struct bstrList * bsplits (const_bstring str, const_bstring splitStr);
-extern struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr);
-extern bstring bjoin (const struct bstrList * bl, const_bstring sep);
-extern int bsplitcb (const_bstring str, unsigned char splitChar, int pos,
- int (* cb) (void * parm, int ofs, int len), void * parm);
-extern int bsplitscb (const_bstring str, const_bstring splitStr, int pos,
- int (* cb) (void * parm, int ofs, int len), void * parm);
-extern int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos,
- int (* cb) (void * parm, int ofs, int len), void * parm);
-
-/* Miscellaneous functions */
-extern int bpattern (bstring b, int len);
-extern int btoupper (bstring b);
-extern int btolower (bstring b);
-extern int bltrimws (bstring b);
-extern int brtrimws (bstring b);
-extern int btrimws (bstring b);
-
-/* <*>printf format functions */
-#if !defined (BSTRLIB_NOVSNP)
-extern bstring bformat (const char * fmt, ...);
-extern int bformata (bstring b, const char * fmt, ...);
-extern int bassignformat (bstring b, const char * fmt, ...);
-extern int bvcformata (bstring b, int count, const char * fmt, va_list arglist);
-
-#define bvformata(ret, b, fmt, lastarg) { \
-bstring bstrtmp_b = (b); \
-const char * bstrtmp_fmt = (fmt); \
-int bstrtmp_r = BSTR_ERR, bstrtmp_sz = 16; \
- for (;;) { \
- va_list bstrtmp_arglist; \
- va_start (bstrtmp_arglist, lastarg); \
- bstrtmp_r = bvcformata (bstrtmp_b, bstrtmp_sz, bstrtmp_fmt, bstrtmp_arglist); \
- va_end (bstrtmp_arglist); \
- if (bstrtmp_r >= 0) { /* Everything went ok */ \
- bstrtmp_r = BSTR_OK; \
- break; \
- } else if (-bstrtmp_r <= bstrtmp_sz) { /* A real error? */ \
- bstrtmp_r = BSTR_ERR; \
- break; \
- } \
- bstrtmp_sz = -bstrtmp_r; /* Doubled or target size */ \
- } \
- ret = bstrtmp_r; \
-}
-
-#endif
-
-typedef int (*bNgetc) (void *parm);
-typedef size_t (* bNread) (void *buff, size_t elsize, size_t nelem, void *parm);
-
-/* Input functions */
-extern bstring bgets (bNgetc getcPtr, void * parm, char terminator);
-extern bstring bread (bNread readPtr, void * parm);
-extern int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator);
-extern int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator);
-extern int breada (bstring b, bNread readPtr, void * parm);
-
-/* Stream functions */
-extern struct bStream * bsopen (bNread readPtr, void * parm);
-extern void * bsclose (struct bStream * s);
-extern int bsbufflength (struct bStream * s, int sz);
-extern int bsreadln (bstring b, struct bStream * s, char terminator);
-extern int bsreadlns (bstring r, struct bStream * s, const_bstring term);
-extern int bsread (bstring b, struct bStream * s, int n);
-extern int bsreadlna (bstring b, struct bStream * s, char terminator);
-extern int bsreadlnsa (bstring r, struct bStream * s, const_bstring term);
-extern int bsreada (bstring b, struct bStream * s, int n);
-extern int bsunread (struct bStream * s, const_bstring b);
-extern int bspeek (bstring r, const struct bStream * s);
-extern int bssplitscb (struct bStream * s, const_bstring splitStr,
- int (* cb) (void * parm, int ofs, const_bstring entry), void * parm);
-extern int bssplitstrcb (struct bStream * s, const_bstring splitStr,
- int (* cb) (void * parm, int ofs, const_bstring entry), void * parm);
-extern int bseof (const struct bStream * s);
-
-struct tagbstring {
- int mlen;
- int slen;
- unsigned char * data;
-};
-
-/* Accessor macros */
-#define blengthe(b, e) (((b) == (void *)0 || (b)->slen < 0) ? (int)(e) : ((b)->slen))
-#define blength(b) (blengthe ((b), 0))
-#define bdataofse(b, o, e) (((b) == (void *)0 || (b)->data == (void*)0) ? (char *)(e) : ((char *)(b)->data) + (o))
-#define bdataofs(b, o) (bdataofse ((b), (o), (void *)0))
-#define bdatae(b, e) (bdataofse (b, 0, e))
-#define bdata(b) (bdataofs (b, 0))
-#define bchare(b, p, e) ((((unsigned)(p)) < (unsigned)blength(b)) ? ((b)->data[(p)]) : (e))
-#define bchar(b, p) bchare ((b), (p), '\0')
-
-/* Static constant string initialization macro */
-#define bsStaticMlen(q,m) {(m), (int) sizeof(q)-1, (unsigned char *) ("" q "")}
-#if defined(_MSC_VER)
-/* There are many versions of MSVC which emit __LINE__ as a non-constant. */
-# define bsStatic(q) bsStaticMlen(q,-32)
-#endif
-#ifndef bsStatic
-# define bsStatic(q) bsStaticMlen(q,-__LINE__)
-#endif
-
-/* Static constant block parameter pair */
-#define bsStaticBlkParms(q) ((void *)("" q "")), ((int) sizeof(q)-1)
-
-/* Reference building macros */
-#define cstr2tbstr btfromcstr
-#define btfromcstr(t,s) { \
- (t).data = (unsigned char *) (s); \
- (t).slen = ((t).data) ? ((int) (strlen) ((char *)(t).data)) : 0; \
- (t).mlen = -1; \
-}
-#define blk2tbstr(t,s,l) { \
- (t).data = (unsigned char *) (s); \
- (t).slen = l; \
- (t).mlen = -1; \
-}
-#define btfromblk(t,s,l) blk2tbstr(t,s,l)
-#define bmid2tbstr(t,b,p,l) { \
- const_bstring bstrtmp_s = (b); \
- if (bstrtmp_s && bstrtmp_s->data && bstrtmp_s->slen >= 0) { \
- int bstrtmp_left = (p); \
- int bstrtmp_len = (l); \
- if (bstrtmp_left < 0) { \
- bstrtmp_len += bstrtmp_left; \
- bstrtmp_left = 0; \
- } \
- if (bstrtmp_len > bstrtmp_s->slen - bstrtmp_left) \
- bstrtmp_len = bstrtmp_s->slen - bstrtmp_left; \
- if (bstrtmp_len <= 0) { \
- (t).data = (unsigned char *)""; \
- (t).slen = 0; \
- } else { \
- (t).data = bstrtmp_s->data + bstrtmp_left; \
- (t).slen = bstrtmp_len; \
- } \
- } else { \
- (t).data = (unsigned char *)""; \
- (t).slen = 0; \
- } \
- (t).mlen = -__LINE__; \
-}
-#define btfromblkltrimws(t,s,l) { \
- int bstrtmp_idx = 0, bstrtmp_len = (l); \
- unsigned char * bstrtmp_s = (s); \
- if (bstrtmp_s && bstrtmp_len >= 0) { \
- for (; bstrtmp_idx < bstrtmp_len; bstrtmp_idx++) { \
- if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \
- } \
- } \
- (t).data = bstrtmp_s + bstrtmp_idx; \
- (t).slen = bstrtmp_len - bstrtmp_idx; \
- (t).mlen = -__LINE__; \
-}
-#define btfromblkrtrimws(t,s,l) { \
- int bstrtmp_len = (l) - 1; \
- unsigned char * bstrtmp_s = (s); \
- if (bstrtmp_s && bstrtmp_len >= 0) { \
- for (; bstrtmp_len >= 0; bstrtmp_len--) { \
- if (!isspace (bstrtmp_s[bstrtmp_len])) break; \
- } \
- } \
- (t).data = bstrtmp_s; \
- (t).slen = bstrtmp_len + 1; \
- (t).mlen = -__LINE__; \
-}
-#define btfromblktrimws(t,s,l) { \
- int bstrtmp_idx = 0, bstrtmp_len = (l) - 1; \
- unsigned char * bstrtmp_s = (s); \
- if (bstrtmp_s && bstrtmp_len >= 0) { \
- for (; bstrtmp_idx <= bstrtmp_len; bstrtmp_idx++) { \
- if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \
- } \
- for (; bstrtmp_len >= bstrtmp_idx; bstrtmp_len--) { \
- if (!isspace (bstrtmp_s[bstrtmp_len])) break; \
- } \
- } \
- (t).data = bstrtmp_s + bstrtmp_idx; \
- (t).slen = bstrtmp_len + 1 - bstrtmp_idx; \
- (t).mlen = -__LINE__; \
-}
-
-/* Write protection macros */
-#define bwriteprotect(t) { if ((t).mlen >= 0) (t).mlen = -1; }
-#define bwriteallow(t) { if ((t).mlen == -1) (t).mlen = (t).slen + ((t).slen == 0); }
-#define biswriteprotected(t) ((t).mlen <= 0)
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif
diff --git a/src/buffer.c b/src/buffer.c
new file mode 100644
index 0000000..b81e7fa
--- /dev/null
+++ b/src/buffer.c
@@ -0,0 +1,313 @@
+#include <stdarg.h>
+#include <ctype.h>
+#include <string.h>
+#include <assert.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/param.h>
+
+#include "buffer.h"
+
+/* Used as default value for gh_buf->ptr so that people can always
+ * assume ptr is non-NULL and zero terminated even for new gh_bufs.
+ */
+unsigned char gh_buf__initbuf[1];
+unsigned char gh_buf__oom[1];
+
+#define ENSURE_SIZE(b, d) \
+ if ((d) > buf->asize && gh_buf_grow(b, (d)) < 0)\
+ return -1;
+
+void gh_buf_init(gh_buf *buf, int initial_size)
+{
+ buf->asize = 0;
+ buf->size = 0;
+ buf->ptr = gh_buf__initbuf;
+
+ if (initial_size)
+ gh_buf_grow(buf, initial_size);
+}
+
+int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom)
+{
+ char *new_ptr;
+ size_t new_size;
+
+ if (buf->ptr == gh_buf__oom || buf->asize < 0)
+ return -1;
+
+ if (target_size <= buf->asize)
+ return 0;
+
+ if (buf->asize == 0) {
+ new_size = target_size;
+ new_ptr = NULL;
+ } else {
+ new_size = buf->asize;
+ new_ptr = buf->ptr;
+ }
+
+ /* grow the buffer size by 1.5, until it's big enough
+ * to fit our target size */
+ while (new_size < target_size)
+ new_size = (new_size << 1) - (new_size >> 1);
+
+ /* round allocation up to multiple of 8 */
+ new_size = (new_size + 7) & ~7;
+
+ new_ptr = realloc(new_ptr, new_size);
+
+ if (!new_ptr) {
+ if (mark_oom)
+ buf->ptr = gh_buf__oom;
+ return -1;
+ }
+
+ buf->asize = new_size;
+ buf->ptr = new_ptr;
+
+ /* truncate the existing buffer size if necessary */
+ if (buf->size >= buf->asize)
+ buf->size = buf->asize - 1;
+ buf->ptr[buf->size] = '\0';
+
+ return 0;
+}
+
+void gh_buf_free(gh_buf *buf)
+{
+ if (!buf) return;
+
+ if (buf->asize > 0 && buf->ptr != gh_buf__initbuf && buf->ptr != gh_buf__oom)
+ free(buf->ptr);
+
+ gh_buf_init(buf, 0);
+}
+
+void gh_buf_clear(gh_buf *buf)
+{
+ buf->size = 0;
+
+ if (buf->asize > 0)
+ buf->ptr[0] = '\0';
+
+ if (buf->asize < 0) {
+ buf->ptr = gh_buf__initbuf;
+ buf->asize = 0;
+ }
+}
+
+int gh_buf_set(gh_buf *buf, const char *data, int len)
+{
+ if (len == 0 || data == NULL) {
+ gh_buf_clear(buf);
+ } else {
+ if (data != buf->ptr) {
+ ENSURE_SIZE(buf, len + 1);
+ memmove(buf->ptr, data, len);
+ }
+ buf->size = len;
+ buf->ptr[buf->size] = '\0';
+ }
+ return 0;
+}
+
+int gh_buf_sets(gh_buf *buf, const char *string)
+{
+ return gh_buf_set(buf, string, string ? strlen(string) : 0);
+}
+
+int gh_buf_putc(gh_buf *buf, char c)
+{
+ ENSURE_SIZE(buf, buf->size + 2);
+ buf->ptr[buf->size++] = c;
+ buf->ptr[buf->size] = '\0';
+ return 0;
+}
+
+int gh_buf_put(gh_buf *buf, const char *data, int len)
+{
+ ENSURE_SIZE(buf, buf->size + len + 1);
+ memmove(buf->ptr + buf->size, data, len);
+ buf->size += len;
+ buf->ptr[buf->size] = '\0';
+ return 0;
+}
+
+int gh_buf_puts(gh_buf *buf, const char *string)
+{
+ assert(string);
+ return gh_buf_put(buf, string, strlen(string));
+}
+
+int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap)
+{
+ const int expected_size = buf->size + (strlen(format) * 2);
+ int len;
+
+ ENSURE_SIZE(buf, expected_size);
+
+ while (1) {
+ va_list args;
+ va_copy(args, ap);
+
+ len = vsnprintf(
+ buf->ptr + buf->size,
+ buf->asize - buf->size,
+ format, args
+ );
+
+ if (len < 0) {
+ free(buf->ptr);
+ buf->ptr = gh_buf__oom;
+ return -1;
+ }
+
+ if (len + 1 <= buf->asize - buf->size) {
+ buf->size += len;
+ break;
+ }
+
+ ENSURE_SIZE(buf, buf->size + len + 1);
+ }
+
+ return 0;
+}
+
+int gh_buf_printf(gh_buf *buf, const char *format, ...)
+{
+ int r;
+ va_list ap;
+
+ va_start(ap, format);
+ r = gh_buf_vprintf(buf, format, ap);
+ va_end(ap);
+
+ return r;
+}
+
+void gh_buf_copy_cstr(char *data, size_t datasize, const gh_buf *buf)
+{
+ size_t copylen;
+
+ assert(data && datasize && buf);
+
+ data[0] = '\0';
+
+ if (buf->size == 0 || buf->asize <= 0)
+ return;
+
+ copylen = buf->size;
+ if (copylen > datasize - 1)
+ copylen = datasize - 1;
+ memmove(data, buf->ptr, copylen);
+ data[copylen] = '\0';
+}
+
+void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b)
+{
+ gh_buf t = *buf_a;
+ *buf_a = *buf_b;
+ *buf_b = t;
+}
+
+char *gh_buf_detach(gh_buf *buf)
+{
+ char *data = buf->ptr;
+
+ if (buf->asize == 0 || buf->ptr == gh_buf__oom)
+ return NULL;
+
+ gh_buf_init(buf, 0);
+
+ return data;
+}
+
+void gh_buf_attach(gh_buf *buf, char *ptr, int asize)
+{
+ gh_buf_free(buf);
+
+ if (ptr) {
+ buf->ptr = ptr;
+ buf->size = strlen(ptr);
+ if (asize)
+ buf->asize = (asize < buf->size) ? buf->size + 1 : asize;
+ else /* pass 0 to fall back on strlen + 1 */
+ buf->asize = buf->size + 1;
+ } else {
+ gh_buf_grow(buf, asize);
+ }
+}
+
+int gh_buf_cmp(const gh_buf *a, const gh_buf *b)
+{
+ int result = memcmp(a->ptr, b->ptr, MIN(a->size, b->size));
+ return (result != 0) ? result :
+ (a->size < b->size) ? -1 : (a->size > b->size) ? 1 : 0;
+}
+
+int gh_buf_strchr(const gh_buf *buf, int c, int pos)
+{
+ const char *p = memchr(buf->ptr + pos, c, buf->size - pos);
+ if (!p)
+ return -1;
+
+ return (int)(p - p->ptr);
+}
+
+int gh_buf_strrchr(const gh_buf *buf, int c, int pos)
+{
+ int i;
+
+ for (i = pos; i >= 0; i--) {
+ if (buf->ptr[i] == (unsigned char) c)
+ return i;
+ }
+
+ return -1;
+}
+
+void gh_buf_truncate(gh_buf *buf, size_t len)
+{
+ assert(buf->asize >= 0);
+
+ if (len < buf->size) {
+ buf->size = len;
+ buf->ptr[buf->size] = '\0';
+ }
+}
+
+void gh_buf_ltruncate(gh_buf *buf, size_t len)
+{
+ assert(buf->asize >= 0);
+
+ if (len && len < buf->size) {
+ memmove(buf->ptr, buf->ptr + len, buf->size - len);
+ buf->size -= len;
+ buf->ptr[buf->size] = '\0';
+ }
+}
+
+void gh_buf_trim(gh_buf *buf)
+{
+ size_t i = 0;
+
+ assert(buf->asize >= 0);
+
+ /* ltrim */
+ while (i < buf->size && isspace(buf->ptr[i]))
+ i++;
+
+ gh_buf_truncate(buf, i);
+
+ /* rtrim */
+ while (buf->size > 0) {
+ if (!isspace(buf->ptr[buf->size - 1]))
+ break;
+
+ buf->size--;
+ }
+
+ buf->ptr[buf->size] = '\0';
+}
diff --git a/src/buffer.h b/src/buffer.h
new file mode 100644
index 0000000..2581ee3
--- /dev/null
+++ b/src/buffer.h
@@ -0,0 +1,119 @@
+#ifndef INCLUDE_buffer_h__
+#define INCLUDE_buffer_h__
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdarg.h>
+#include <sys/types.h>
+
+typedef struct {
+ unsigned char *ptr;
+ int asize, size;
+} gh_buf;
+
+extern unsigned char gh_buf__initbuf[];
+extern unsigned char gh_buf__oom[];
+
+#define GH_BUF_INIT { gh_buf__initbuf, 0, 0 }
+
+/**
+ * Initialize a gh_buf structure.
+ *
+ * For the cases where GH_BUF_INIT cannot be used to do static
+ * initialization.
+ */
+extern void gh_buf_init(gh_buf *buf, int initial_size);
+
+static inline void gh_buf_static(gh_buf *buf, unsigned char *source)
+{
+ buf->ptr = source;
+ buf->size = strlen(source);
+ buf->asize = -1;
+}
+
+/**
+ * Attempt to grow the buffer to hold at least `target_size` bytes.
+ *
+ * If the allocation fails, this will return an error. If mark_oom is true,
+ * this will mark the buffer as invalid for future operations; if false,
+ * existing buffer content will be preserved, but calling code must handle
+ * that buffer was not expanded.
+ */
+extern int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom);
+
+/**
+ * Grow the buffer to hold at least `target_size` bytes.
+ *
+ * If the allocation fails, this will return an error and the buffer will be
+ * marked as invalid for future operations, invaliding contents.
+ *
+ * @return 0 on success or -1 on failure
+ */
+static inline int gh_buf_grow(gh_buf *buf, int target_size)
+{
+ return gh_buf_try_grow(buf, target_size, true);
+}
+
+extern void gh_buf_free(gh_buf *buf);
+extern void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b);
+
+/**
+ * Test if there have been any reallocation failures with this gh_buf.
+ *
+ * Any function that writes to a gh_buf can fail due to memory allocation
+ * issues. If one fails, the gh_buf will be marked with an OOM error and
+ * further calls to modify the buffer will fail. Check gh_buf_oom() at the
+ * end of your sequence and it will be true if you ran out of memory at any
+ * point with that buffer.
+ *
+ * @return false if no error, true if allocation error
+ */
+static inline bool gh_buf_oom(const gh_buf *buf)
+{
+ return (buf->ptr == gh_buf__oom);
+}
+
+
+static inline size_t gh_buf_len(const gh_buf *buf)
+{
+ return buf->size;
+}
+
+extern int gh_buf_cmp(const gh_buf *a, const gh_buf *b);
+
+extern void gh_buf_attach(gh_buf *buf, char *ptr, int asize);
+extern char *gh_buf_detach(gh_buf *buf);
+extern void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf);
+
+static inline const char *gh_buf_cstr(const gh_buf *buf)
+{
+ return buf->ptr;
+}
+
+#define gh_buf_at(buf, n) ((buf)->ptr[n])
+
+/*
+ * Functions below that return int value error codes will return 0 on
+ * success or -1 on failure (which generally means an allocation failed).
+ * Using a gh_buf where the allocation has failed with result in -1 from
+ * all further calls using that buffer. As a result, you can ignore the
+ * return code of these functions and call them in a series then just call
+ * gh_buf_oom at the end.
+ */
+extern int gh_buf_set(gh_buf *buf, const char *data, int len);
+extern int gh_buf_sets(gh_buf *buf, const char *string);
+extern int gh_buf_putc(gh_buf *buf, char c);
+extern int gh_buf_put(gh_buf *buf, const char *data, int len);
+extern int gh_buf_puts(gh_buf *buf, const char *string);
+extern int gh_buf_printf(gh_buf *buf, const char *format, ...)
+ __attribute__((format (printf, 2, 3)));
+extern int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap);
+extern void gh_buf_clear(gh_buf *buf);
+
+int gh_buf_strchr(const gh_buf *buf, int c, int pos);
+int gh_buf_strrchr(const gh_buf *buf, int c, int pos);
+void gh_buf_truncate(gh_buf *buf, int len);
+void gh_buf_ltruncate(gh_buf *buf, int len);
+void gh_buf_trim(gh_buf *buf);
+
+#endif
diff --git a/src/case_fold_switch.c b/src/case_fold_switch.inc
index 70fdd75..70fdd75 100644
--- a/src/case_fold_switch.c
+++ b/src/case_fold_switch.inc
diff --git a/src/casefold.c b/src/casefold.c
deleted file mode 100644
index 33f18aa..0000000
--- a/src/casefold.c
+++ /dev/null
@@ -1,2699 +0,0 @@
-#include <stdlib.h>
-#include <stdio.h>
-
-
- switch c {
- case 0x0041:
- bufpush(0x0061);
- break;
- case 0x0042:
- bufpush(0x0062);
- break;
- case 0x0043:
- bufpush(0x0063);
- break;
- case 0x0044:
- bufpush(0x0064);
- break;
- case 0x0045:
- bufpush(0x0065);
- break;
- case 0x0046:
- bufpush(0x0066);
- break;
- case 0x0047:
- bufpush(0x0067);
- break;
- case 0x0048:
- bufpush(0x0068);
- break;
- case 0x0049:
- bufpush(0x0069);
- break;
- case 0x0049:
- bufpush(0x0131);
- break;
- case 0x004A:
- bufpush(0x006A);
- break;
- case 0x004B:
- bufpush(0x006B);
- break;
- case 0x004C:
- bufpush(0x006C);
- break;
- case 0x004D:
- bufpush(0x006D);
- break;
- case 0x004E:
- bufpush(0x006E);
- break;
- case 0x004F:
- bufpush(0x006F);
- break;
- case 0x0050:
- bufpush(0x0070);
- break;
- case 0x0051:
- bufpush(0x0071);
- break;
- case 0x0052:
- bufpush(0x0072);
- break;
- case 0x0053:
- bufpush(0x0073);
- break;
- case 0x0054:
- bufpush(0x0074);
- break;
- case 0x0055:
- bufpush(0x0075);
- break;
- case 0x0056:
- bufpush(0x0076);
- break;
- case 0x0057:
- bufpush(0x0077);
- break;
- case 0x0058:
- bufpush(0x0078);
- break;
- case 0x0059:
- bufpush(0x0079);
- break;
- case 0x005A:
- bufpush(0x007A);
- break;
- case 0x00B5:
- bufpush(0x03BC);
- break;
- case 0x00C0:
- bufpush(0x00E0);
- break;
- case 0x00C1:
- bufpush(0x00E1);
- break;
- case 0x00C2:
- bufpush(0x00E2);
- break;
- case 0x00C3:
- bufpush(0x00E3);
- break;
- case 0x00C4:
- bufpush(0x00E4);
- break;
- case 0x00C5:
- bufpush(0x00E5);
- break;
- case 0x00C6:
- bufpush(0x00E6);
- break;
- case 0x00C7:
- bufpush(0x00E7);
- break;
- case 0x00C8:
- bufpush(0x00E8);
- break;
- case 0x00C9:
- bufpush(0x00E9);
- break;
- case 0x00CA:
- bufpush(0x00EA);
- break;
- case 0x00CB:
- bufpush(0x00EB);
- break;
- case 0x00CC:
- bufpush(0x00EC);
- break;
- case 0x00CD:
- bufpush(0x00ED);
- break;
- case 0x00CE:
- bufpush(0x00EE);
- break;
- case 0x00CF:
- bufpush(0x00EF);
- break;
- case 0x00D0:
- bufpush(0x00F0);
- break;
- case 0x00D1:
- bufpush(0x00F1);
- break;
- case 0x00D2:
- bufpush(0x00F2);
- break;
- case 0x00D3:
- bufpush(0x00F3);
- break;
- case 0x00D4:
- bufpush(0x00F4);
- break;
- case 0x00D5:
- bufpush(0x00F5);
- break;
- case 0x00D6:
- bufpush(0x00F6);
- break;
- case 0x00D8:
- bufpush(0x00F8);
- break;
- case 0x00D9:
- bufpush(0x00F9);
- break;
- case 0x00DA:
- bufpush(0x00FA);
- break;
- case 0x00DB:
- bufpush(0x00FB);
- break;
- case 0x00DC:
- bufpush(0x00FC);
- break;
- case 0x00DD:
- bufpush(0x00FD);
- break;
- case 0x00DE:
- bufpush(0x00FE);
- break;
- case 0x00DF:
- bufpush(0x0073);
- bufpush(0x0073);
- break;
- case 0x0100:
- bufpush(0x0101);
- break;
- case 0x0102:
- bufpush(0x0103);
- break;
- case 0x0104:
- bufpush(0x0105);
- break;
- case 0x0106:
- bufpush(0x0107);
- break;
- case 0x0108:
- bufpush(0x0109);
- break;
- case 0x010A:
- bufpush(0x010B);
- break;
- case 0x010C:
- bufpush(0x010D);
- break;
- case 0x010E:
- bufpush(0x010F);
- break;
- case 0x0110:
- bufpush(0x0111);
- break;
- case 0x0112:
- bufpush(0x0113);
- break;
- case 0x0114:
- bufpush(0x0115);
- break;
- case 0x0116:
- bufpush(0x0117);
- break;
- case 0x0118:
- bufpush(0x0119);
- break;
- case 0x011A:
- bufpush(0x011B);
- break;
- case 0x011C:
- bufpush(0x011D);
- break;
- case 0x011E:
- bufpush(0x011F);
- break;
- case 0x0120:
- bufpush(0x0121);
- break;
- case 0x0122:
- bufpush(0x0123);
- break;
- case 0x0124:
- bufpush(0x0125);
- break;
- case 0x0126:
- bufpush(0x0127);
- break;
- case 0x0128:
- bufpush(0x0129);
- break;
- case 0x012A:
- bufpush(0x012B);
- break;
- case 0x012C:
- bufpush(0x012D);
- break;
- case 0x012E:
- bufpush(0x012F);
- break;
- case 0x0130:
- bufpush(0x0069);
- bufpush(0x0307);
- break;
- case 0x0130:
- bufpush(0x0069);
- break;
- case 0x0132:
- bufpush(0x0133);
- break;
- case 0x0134:
- bufpush(0x0135);
- break;
- case 0x0136:
- bufpush(0x0137);
- break;
- case 0x0139:
- bufpush(0x013A);
- break;
- case 0x013B:
- bufpush(0x013C);
- break;
- case 0x013D:
- bufpush(0x013E);
- break;
- case 0x013F:
- bufpush(0x0140);
- break;
- case 0x0141:
- bufpush(0x0142);
- break;
- case 0x0143:
- bufpush(0x0144);
- break;
- case 0x0145:
- bufpush(0x0146);
- break;
- case 0x0147:
- bufpush(0x0148);
- break;
- case 0x0149:
- bufpush(0x02BC);
- bufpush(0x006E);
- break;
- case 0x014A:
- bufpush(0x014B);
- break;
- case 0x014C:
- bufpush(0x014D);
- break;
- case 0x014E:
- bufpush(0x014F);
- break;
- case 0x0150:
- bufpush(0x0151);
- break;
- case 0x0152:
- bufpush(0x0153);
- break;
- case 0x0154:
- bufpush(0x0155);
- break;
- case 0x0156:
- bufpush(0x0157);
- break;
- case 0x0158:
- bufpush(0x0159);
- break;
- case 0x015A:
- bufpush(0x015B);
- break;
- case 0x015C:
- bufpush(0x015D);
- break;
- case 0x015E:
- bufpush(0x015F);
- break;
- case 0x0160:
- bufpush(0x0161);
- break;
- case 0x0162:
- bufpush(0x0163);
- break;
- case 0x0164:
- bufpush(0x0165);
- break;
- case 0x0166:
- bufpush(0x0167);
- break;
- case 0x0168:
- bufpush(0x0169);
- break;
- case 0x016A:
- bufpush(0x016B);
- break;
- case 0x016C:
- bufpush(0x016D);
- break;
- case 0x016E:
- bufpush(0x016F);
- break;
- case 0x0170:
- bufpush(0x0171);
- break;
- case 0x0172:
- bufpush(0x0173);
- break;
- case 0x0174:
- bufpush(0x0175);
- break;
- case 0x0176:
- bufpush(0x0177);
- break;
- case 0x0178:
- bufpush(0x00FF);
- break;
- case 0x0179:
- bufpush(0x017A);
- break;
- case 0x017B:
- bufpush(0x017C);
- break;
- case 0x017D:
- bufpush(0x017E);
- break;
- case 0x017F:
- bufpush(0x0073);
- break;
- case 0x0181:
- bufpush(0x0253);
- break;
- case 0x0182:
- bufpush(0x0183);
- break;
- case 0x0184:
- bufpush(0x0185);
- break;
- case 0x0186:
- bufpush(0x0254);
- break;
- case 0x0187:
- bufpush(0x0188);
- break;
- case 0x0189:
- bufpush(0x0256);
- break;
- case 0x018A:
- bufpush(0x0257);
- break;
- case 0x018B:
- bufpush(0x018C);
- break;
- case 0x018E:
- bufpush(0x01DD);
- break;
- case 0x018F:
- bufpush(0x0259);
- break;
- case 0x0190:
- bufpush(0x025B);
- break;
- case 0x0191:
- bufpush(0x0192);
- break;
- case 0x0193:
- bufpush(0x0260);
- break;
- case 0x0194:
- bufpush(0x0263);
- break;
- case 0x0196:
- bufpush(0x0269);
- break;
- case 0x0197:
- bufpush(0x0268);
- break;
- case 0x0198:
- bufpush(0x0199);
- break;
- case 0x019C:
- bufpush(0x026F);
- break;
- case 0x019D:
- bufpush(0x0272);
- break;
- case 0x019F:
- bufpush(0x0275);
- break;
- case 0x01A0:
- bufpush(0x01A1);
- break;
- case 0x01A2:
- bufpush(0x01A3);
- break;
- case 0x01A4:
- bufpush(0x01A5);
- break;
- case 0x01A6:
- bufpush(0x0280);
- break;
- case 0x01A7:
- bufpush(0x01A8);
- break;
- case 0x01A9:
- bufpush(0x0283);
- break;
- case 0x01AC:
- bufpush(0x01AD);
- break;
- case 0x01AE:
- bufpush(0x0288);
- break;
- case 0x01AF:
- bufpush(0x01B0);
- break;
- case 0x01B1:
- bufpush(0x028A);
- break;
- case 0x01B2:
- bufpush(0x028B);
- break;
- case 0x01B3:
- bufpush(0x01B4);
- break;
- case 0x01B5:
- bufpush(0x01B6);
- break;
- case 0x01B7:
- bufpush(0x0292);
- break;
- case 0x01B8:
- bufpush(0x01B9);
- break;
- case 0x01BC:
- bufpush(0x01BD);
- break;
- case 0x01C4:
- bufpush(0x01C6);
- break;
- case 0x01C5:
- bufpush(0x01C6);
- break;
- case 0x01C7:
- bufpush(0x01C9);
- break;
- case 0x01C8:
- bufpush(0x01C9);
- break;
- case 0x01CA:
- bufpush(0x01CC);
- break;
- case 0x01CB:
- bufpush(0x01CC);
- break;
- case 0x01CD:
- bufpush(0x01CE);
- break;
- case 0x01CF:
- bufpush(0x01D0);
- break;
- case 0x01D1:
- bufpush(0x01D2);
- break;
- case 0x01D3:
- bufpush(0x01D4);
- break;
- case 0x01D5:
- bufpush(0x01D6);
- break;
- case 0x01D7:
- bufpush(0x01D8);
- break;
- case 0x01D9:
- bufpush(0x01DA);
- break;
- case 0x01DB:
- bufpush(0x01DC);
- break;
- case 0x01DE:
- bufpush(0x01DF);
- break;
- case 0x01E0:
- bufpush(0x01E1);
- break;
- case 0x01E2:
- bufpush(0x01E3);
- break;
- case 0x01E4:
- bufpush(0x01E5);
- break;
- case 0x01E6:
- bufpush(0x01E7);
- break;
- case 0x01E8:
- bufpush(0x01E9);
- break;
- case 0x01EA:
- bufpush(0x01EB);
- break;
- case 0x01EC:
- bufpush(0x01ED);
- break;
- case 0x01EE:
- bufpush(0x01EF);
- break;
- case 0x01F0:
- bufpush(0x006A);
- bufpush(0x030C);
- break;
- case 0x01F1:
- bufpush(0x01F3);
- break;
- case 0x01F2:
- bufpush(0x01F3);
- break;
- case 0x01F4:
- bufpush(0x01F5);
- break;
- case 0x01F6:
- bufpush(0x0195);
- break;
- case 0x01F7:
- bufpush(0x01BF);
- break;
- case 0x01F8:
- bufpush(0x01F9);
- break;
- case 0x01FA:
- bufpush(0x01FB);
- break;
- case 0x01FC:
- bufpush(0x01FD);
- break;
- case 0x01FE:
- bufpush(0x01FF);
- break;
- case 0x0200:
- bufpush(0x0201);
- break;
- case 0x0202:
- bufpush(0x0203);
- break;
- case 0x0204:
- bufpush(0x0205);
- break;
- case 0x0206:
- bufpush(0x0207);
- break;
- case 0x0208:
- bufpush(0x0209);
- break;
- case 0x020A:
- bufpush(0x020B);
- break;
- case 0x020C:
- bufpush(0x020D);
- break;
- case 0x020E:
- bufpush(0x020F);
- break;
- case 0x0210:
- bufpush(0x0211);
- break;
- case 0x0212:
- bufpush(0x0213);
- break;
- case 0x0214:
- bufpush(0x0215);
- break;
- case 0x0216:
- bufpush(0x0217);
- break;
- case 0x0218:
- bufpush(0x0219);
- break;
- case 0x021A:
- bufpush(0x021B);
- break;
- case 0x021C:
- bufpush(0x021D);
- break;
- case 0x021E:
- bufpush(0x021F);
- break;
- case 0x0220:
- bufpush(0x019E);
- break;
- case 0x0222:
- bufpush(0x0223);
- break;
- case 0x0224:
- bufpush(0x0225);
- break;
- case 0x0226:
- bufpush(0x0227);
- break;
- case 0x0228:
- bufpush(0x0229);
- break;
- case 0x022A:
- bufpush(0x022B);
- break;
- case 0x022C:
- bufpush(0x022D);
- break;
- case 0x022E:
- bufpush(0x022F);
- break;
- case 0x0230:
- bufpush(0x0231);
- break;
- case 0x0232:
- bufpush(0x0233);
- break;
- case 0x0345:
- bufpush(0x03B9);
- break;
- case 0x0386:
- bufpush(0x03AC);
- break;
- case 0x0388:
- bufpush(0x03AD);
- break;
- case 0x0389:
- bufpush(0x03AE);
- break;
- case 0x038A:
- bufpush(0x03AF);
- break;
- case 0x038C:
- bufpush(0x03CC);
- break;
- case 0x038E:
- bufpush(0x03CD);
- break;
- case 0x038F:
- bufpush(0x03CE);
- break;
- case 0x0390:
- bufpush(0x03B9);
- bufpush(0x0308);
- bufpush(0x0301);
- break;
- case 0x0391:
- bufpush(0x03B1);
- break;
- case 0x0392:
- bufpush(0x03B2);
- break;
- case 0x0393:
- bufpush(0x03B3);
- break;
- case 0x0394:
- bufpush(0x03B4);
- break;
- case 0x0395:
- bufpush(0x03B5);
- break;
- case 0x0396:
- bufpush(0x03B6);
- break;
- case 0x0397:
- bufpush(0x03B7);
- break;
- case 0x0398:
- bufpush(0x03B8);
- break;
- case 0x0399:
- bufpush(0x03B9);
- break;
- case 0x039A:
- bufpush(0x03BA);
- break;
- case 0x039B:
- bufpush(0x03BB);
- break;
- case 0x039C:
- bufpush(0x03BC);
- break;
- case 0x039D:
- bufpush(0x03BD);
- break;
- case 0x039E:
- bufpush(0x03BE);
- break;
- case 0x039F:
- bufpush(0x03BF);
- break;
- case 0x03A0:
- bufpush(0x03C0);
- break;
- case 0x03A1:
- bufpush(0x03C1);
- break;
- case 0x03A3:
- bufpush(0x03C3);
- break;
- case 0x03A4:
- bufpush(0x03C4);
- break;
- case 0x03A5:
- bufpush(0x03C5);
- break;
- case 0x03A6:
- bufpush(0x03C6);
- break;
- case 0x03A7:
- bufpush(0x03C7);
- break;
- case 0x03A8:
- bufpush(0x03C8);
- break;
- case 0x03A9:
- bufpush(0x03C9);
- break;
- case 0x03AA:
- bufpush(0x03CA);
- break;
- case 0x03AB:
- bufpush(0x03CB);
- break;
- case 0x03B0:
- bufpush(0x03C5);
- bufpush(0x0308);
- bufpush(0x0301);
- break;
- case 0x03C2:
- bufpush(0x03C3);
- break;
- case 0x03D0:
- bufpush(0x03B2);
- break;
- case 0x03D1:
- bufpush(0x03B8);
- break;
- case 0x03D5:
- bufpush(0x03C6);
- break;
- case 0x03D6:
- bufpush(0x03C0);
- break;
- case 0x03D8:
- bufpush(0x03D9);
- break;
- case 0x03DA:
- bufpush(0x03DB);
- break;
- case 0x03DC:
- bufpush(0x03DD);
- break;
- case 0x03DE:
- bufpush(0x03DF);
- break;
- case 0x03E0:
- bufpush(0x03E1);
- break;
- case 0x03E2:
- bufpush(0x03E3);
- break;
- case 0x03E4:
- bufpush(0x03E5);
- break;
- case 0x03E6:
- bufpush(0x03E7);
- break;
- case 0x03E8:
- bufpush(0x03E9);
- break;
- case 0x03EA:
- bufpush(0x03EB);
- break;
- case 0x03EC:
- bufpush(0x03ED);
- break;
- case 0x03EE:
- bufpush(0x03EF);
- break;
- case 0x03F0:
- bufpush(0x03BA);
- break;
- case 0x03F1:
- bufpush(0x03C1);
- break;
- case 0x03F2:
- bufpush(0x03C3);
- break;
- case 0x03F4:
- bufpush(0x03B8);
- break;
- case 0x03F5:
- bufpush(0x03B5);
- break;
- case 0x0400:
- bufpush(0x0450);
- break;
- case 0x0401:
- bufpush(0x0451);
- break;
- case 0x0402:
- bufpush(0x0452);
- break;
- case 0x0403:
- bufpush(0x0453);
- break;
- case 0x0404:
- bufpush(0x0454);
- break;
- case 0x0405:
- bufpush(0x0455);
- break;
- case 0x0406:
- bufpush(0x0456);
- break;
- case 0x0407:
- bufpush(0x0457);
- break;
- case 0x0408:
- bufpush(0x0458);
- break;
- case 0x0409:
- bufpush(0x0459);
- break;
- case 0x040A:
- bufpush(0x045A);
- break;
- case 0x040B:
- bufpush(0x045B);
- break;
- case 0x040C:
- bufpush(0x045C);
- break;
- case 0x040D:
- bufpush(0x045D);
- break;
- case 0x040E:
- bufpush(0x045E);
- break;
- case 0x040F:
- bufpush(0x045F);
- break;
- case 0x0410:
- bufpush(0x0430);
- break;
- case 0x0411:
- bufpush(0x0431);
- break;
- case 0x0412:
- bufpush(0x0432);
- break;
- case 0x0413:
- bufpush(0x0433);
- break;
- case 0x0414:
- bufpush(0x0434);
- break;
- case 0x0415:
- bufpush(0x0435);
- break;
- case 0x0416:
- bufpush(0x0436);
- break;
- case 0x0417:
- bufpush(0x0437);
- break;
- case 0x0418:
- bufpush(0x0438);
- break;
- case 0x0419:
- bufpush(0x0439);
- break;
- case 0x041A:
- bufpush(0x043A);
- break;
- case 0x041B:
- bufpush(0x043B);
- break;
- case 0x041C:
- bufpush(0x043C);
- break;
- case 0x041D:
- bufpush(0x043D);
- break;
- case 0x041E:
- bufpush(0x043E);
- break;
- case 0x041F:
- bufpush(0x043F);
- break;
- case 0x0420:
- bufpush(0x0440);
- break;
- case 0x0421:
- bufpush(0x0441);
- break;
- case 0x0422:
- bufpush(0x0442);
- break;
- case 0x0423:
- bufpush(0x0443);
- break;
- case 0x0424:
- bufpush(0x0444);
- break;
- case 0x0425:
- bufpush(0x0445);
- break;
- case 0x0426:
- bufpush(0x0446);
- break;
- case 0x0427:
- bufpush(0x0447);
- break;
- case 0x0428:
- bufpush(0x0448);
- break;
- case 0x0429:
- bufpush(0x0449);
- break;
- case 0x042A:
- bufpush(0x044A);
- break;
- case 0x042B:
- bufpush(0x044B);
- break;
- case 0x042C:
- bufpush(0x044C);
- break;
- case 0x042D:
- bufpush(0x044D);
- break;
- case 0x042E:
- bufpush(0x044E);
- break;
- case 0x042F:
- bufpush(0x044F);
- break;
- case 0x0460:
- bufpush(0x0461);
- break;
- case 0x0462:
- bufpush(0x0463);
- break;
- case 0x0464:
- bufpush(0x0465);
- break;
- case 0x0466:
- bufpush(0x0467);
- break;
- case 0x0468:
- bufpush(0x0469);
- break;
- case 0x046A:
- bufpush(0x046B);
- break;
- case 0x046C:
- bufpush(0x046D);
- break;
- case 0x046E:
- bufpush(0x046F);
- break;
- case 0x0470:
- bufpush(0x0471);
- break;
- case 0x0472:
- bufpush(0x0473);
- break;
- case 0x0474:
- bufpush(0x0475);
- break;
- case 0x0476:
- bufpush(0x0477);
- break;
- case 0x0478:
- bufpush(0x0479);
- break;
- case 0x047A:
- bufpush(0x047B);
- break;
- case 0x047C:
- bufpush(0x047D);
- break;
- case 0x047E:
- bufpush(0x047F);
- break;
- case 0x0480:
- bufpush(0x0481);
- break;
- case 0x048A:
- bufpush(0x048B);
- break;
- case 0x048C:
- bufpush(0x048D);
- break;
- case 0x048E:
- bufpush(0x048F);
- break;
- case 0x0490:
- bufpush(0x0491);
- break;
- case 0x0492:
- bufpush(0x0493);
- break;
- case 0x0494:
- bufpush(0x0495);
- break;
- case 0x0496:
- bufpush(0x0497);
- break;
- case 0x0498:
- bufpush(0x0499);
- break;
- case 0x049A:
- bufpush(0x049B);
- break;
- case 0x049C:
- bufpush(0x049D);
- break;
- case 0x049E:
- bufpush(0x049F);
- break;
- case 0x04A0:
- bufpush(0x04A1);
- break;
- case 0x04A2:
- bufpush(0x04A3);
- break;
- case 0x04A4:
- bufpush(0x04A5);
- break;
- case 0x04A6:
- bufpush(0x04A7);
- break;
- case 0x04A8:
- bufpush(0x04A9);
- break;
- case 0x04AA:
- bufpush(0x04AB);
- break;
- case 0x04AC:
- bufpush(0x04AD);
- break;
- case 0x04AE:
- bufpush(0x04AF);
- break;
- case 0x04B0:
- bufpush(0x04B1);
- break;
- case 0x04B2:
- bufpush(0x04B3);
- break;
- case 0x04B4:
- bufpush(0x04B5);
- break;
- case 0x04B6:
- bufpush(0x04B7);
- break;
- case 0x04B8:
- bufpush(0x04B9);
- break;
- case 0x04BA:
- bufpush(0x04BB);
- break;
- case 0x04BC:
- bufpush(0x04BD);
- break;
- case 0x04BE:
- bufpush(0x04BF);
- break;
- case 0x04C1:
- bufpush(0x04C2);
- break;
- case 0x04C3:
- bufpush(0x04C4);
- break;
- case 0x04C5:
- bufpush(0x04C6);
- break;
- case 0x04C7:
- bufpush(0x04C8);
- break;
- case 0x04C9:
- bufpush(0x04CA);
- break;
- case 0x04CB:
- bufpush(0x04CC);
- break;
- case 0x04CD:
- bufpush(0x04CE);
- break;
- case 0x04D0:
- bufpush(0x04D1);
- break;
- case 0x04D2:
- bufpush(0x04D3);
- break;
- case 0x04D4:
- bufpush(0x04D5);
- break;
- case 0x04D6:
- bufpush(0x04D7);
- break;
- case 0x04D8:
- bufpush(0x04D9);
- break;
- case 0x04DA:
- bufpush(0x04DB);
- break;
- case 0x04DC:
- bufpush(0x04DD);
- break;
- case 0x04DE:
- bufpush(0x04DF);
- break;
- case 0x04E0:
- bufpush(0x04E1);
- break;
- case 0x04E2:
- bufpush(0x04E3);
- break;
- case 0x04E4:
- bufpush(0x04E5);
- break;
- case 0x04E6:
- bufpush(0x04E7);
- break;
- case 0x04E8:
- bufpush(0x04E9);
- break;
- case 0x04EA:
- bufpush(0x04EB);
- break;
- case 0x04EC:
- bufpush(0x04ED);
- break;
- case 0x04EE:
- bufpush(0x04EF);
- break;
- case 0x04F0:
- bufpush(0x04F1);
- break;
- case 0x04F2:
- bufpush(0x04F3);
- break;
- case 0x04F4:
- bufpush(0x04F5);
- break;
- case 0x04F8:
- bufpush(0x04F9);
- break;
- case 0x0500:
- bufpush(0x0501);
- break;
- case 0x0502:
- bufpush(0x0503);
- break;
- case 0x0504:
- bufpush(0x0505);
- break;
- case 0x0506:
- bufpush(0x0507);
- break;
- case 0x0508:
- bufpush(0x0509);
- break;
- case 0x050A:
- bufpush(0x050B);
- break;
- case 0x050C:
- bufpush(0x050D);
- break;
- case 0x050E:
- bufpush(0x050F);
- break;
- case 0x0531:
- bufpush(0x0561);
- break;
- case 0x0532:
- bufpush(0x0562);
- break;
- case 0x0533:
- bufpush(0x0563);
- break;
- case 0x0534:
- bufpush(0x0564);
- break;
- case 0x0535:
- bufpush(0x0565);
- break;
- case 0x0536:
- bufpush(0x0566);
- break;
- case 0x0537:
- bufpush(0x0567);
- break;
- case 0x0538:
- bufpush(0x0568);
- break;
- case 0x0539:
- bufpush(0x0569);
- break;
- case 0x053A:
- bufpush(0x056A);
- break;
- case 0x053B:
- bufpush(0x056B);
- break;
- case 0x053C:
- bufpush(0x056C);
- break;
- case 0x053D:
- bufpush(0x056D);
- break;
- case 0x053E:
- bufpush(0x056E);
- break;
- case 0x053F:
- bufpush(0x056F);
- break;
- case 0x0540:
- bufpush(0x0570);
- break;
- case 0x0541:
- bufpush(0x0571);
- break;
- case 0x0542:
- bufpush(0x0572);
- break;
- case 0x0543:
- bufpush(0x0573);
- break;
- case 0x0544:
- bufpush(0x0574);
- break;
- case 0x0545:
- bufpush(0x0575);
- break;
- case 0x0546:
- bufpush(0x0576);
- break;
- case 0x0547:
- bufpush(0x0577);
- break;
- case 0x0548:
- bufpush(0x0578);
- break;
- case 0x0549:
- bufpush(0x0579);
- break;
- case 0x054A:
- bufpush(0x057A);
- break;
- case 0x054B:
- bufpush(0x057B);
- break;
- case 0x054C:
- bufpush(0x057C);
- break;
- case 0x054D:
- bufpush(0x057D);
- break;
- case 0x054E:
- bufpush(0x057E);
- break;
- case 0x054F:
- bufpush(0x057F);
- break;
- case 0x0550:
- bufpush(0x0580);
- break;
- case 0x0551:
- bufpush(0x0581);
- break;
- case 0x0552:
- bufpush(0x0582);
- break;
- case 0x0553:
- bufpush(0x0583);
- break;
- case 0x0554:
- bufpush(0x0584);
- break;
- case 0x0555:
- bufpush(0x0585);
- break;
- case 0x0556:
- bufpush(0x0586);
- break;
- case 0x0587:
- bufpush(0x0565);
- bufpush(0x0582);
- break;
- case 0x1E00:
- bufpush(0x1E01);
- break;
- case 0x1E02:
- bufpush(0x1E03);
- break;
- case 0x1E04:
- bufpush(0x1E05);
- break;
- case 0x1E06:
- bufpush(0x1E07);
- break;
- case 0x1E08:
- bufpush(0x1E09);
- break;
- case 0x1E0A:
- bufpush(0x1E0B);
- break;
- case 0x1E0C:
- bufpush(0x1E0D);
- break;
- case 0x1E0E:
- bufpush(0x1E0F);
- break;
- case 0x1E10:
- bufpush(0x1E11);
- break;
- case 0x1E12:
- bufpush(0x1E13);
- break;
- case 0x1E14:
- bufpush(0x1E15);
- break;
- case 0x1E16:
- bufpush(0x1E17);
- break;
- case 0x1E18:
- bufpush(0x1E19);
- break;
- case 0x1E1A:
- bufpush(0x1E1B);
- break;
- case 0x1E1C:
- bufpush(0x1E1D);
- break;
- case 0x1E1E:
- bufpush(0x1E1F);
- break;
- case 0x1E20:
- bufpush(0x1E21);
- break;
- case 0x1E22:
- bufpush(0x1E23);
- break;
- case 0x1E24:
- bufpush(0x1E25);
- break;
- case 0x1E26:
- bufpush(0x1E27);
- break;
- case 0x1E28:
- bufpush(0x1E29);
- break;
- case 0x1E2A:
- bufpush(0x1E2B);
- break;
- case 0x1E2C:
- bufpush(0x1E2D);
- break;
- case 0x1E2E:
- bufpush(0x1E2F);
- break;
- case 0x1E30:
- bufpush(0x1E31);
- break;
- case 0x1E32:
- bufpush(0x1E33);
- break;
- case 0x1E34:
- bufpush(0x1E35);
- break;
- case 0x1E36:
- bufpush(0x1E37);
- break;
- case 0x1E38:
- bufpush(0x1E39);
- break;
- case 0x1E3A:
- bufpush(0x1E3B);
- break;
- case 0x1E3C:
- bufpush(0x1E3D);
- break;
- case 0x1E3E:
- bufpush(0x1E3F);
- break;
- case 0x1E40:
- bufpush(0x1E41);
- break;
- case 0x1E42:
- bufpush(0x1E43);
- break;
- case 0x1E44:
- bufpush(0x1E45);
- break;
- case 0x1E46:
- bufpush(0x1E47);
- break;
- case 0x1E48:
- bufpush(0x1E49);
- break;
- case 0x1E4A:
- bufpush(0x1E4B);
- break;
- case 0x1E4C:
- bufpush(0x1E4D);
- break;
- case 0x1E4E:
- bufpush(0x1E4F);
- break;
- case 0x1E50:
- bufpush(0x1E51);
- break;
- case 0x1E52:
- bufpush(0x1E53);
- break;
- case 0x1E54:
- bufpush(0x1E55);
- break;
- case 0x1E56:
- bufpush(0x1E57);
- break;
- case 0x1E58:
- bufpush(0x1E59);
- break;
- case 0x1E5A:
- bufpush(0x1E5B);
- break;
- case 0x1E5C:
- bufpush(0x1E5D);
- break;
- case 0x1E5E:
- bufpush(0x1E5F);
- break;
- case 0x1E60:
- bufpush(0x1E61);
- break;
- case 0x1E62:
- bufpush(0x1E63);
- break;
- case 0x1E64:
- bufpush(0x1E65);
- break;
- case 0x1E66:
- bufpush(0x1E67);
- break;
- case 0x1E68:
- bufpush(0x1E69);
- break;
- case 0x1E6A:
- bufpush(0x1E6B);
- break;
- case 0x1E6C:
- bufpush(0x1E6D);
- break;
- case 0x1E6E:
- bufpush(0x1E6F);
- break;
- case 0x1E70:
- bufpush(0x1E71);
- break;
- case 0x1E72:
- bufpush(0x1E73);
- break;
- case 0x1E74:
- bufpush(0x1E75);
- break;
- case 0x1E76:
- bufpush(0x1E77);
- break;
- case 0x1E78:
- bufpush(0x1E79);
- break;
- case 0x1E7A:
- bufpush(0x1E7B);
- break;
- case 0x1E7C:
- bufpush(0x1E7D);
- break;
- case 0x1E7E:
- bufpush(0x1E7F);
- break;
- case 0x1E80:
- bufpush(0x1E81);
- break;
- case 0x1E82:
- bufpush(0x1E83);
- break;
- case 0x1E84:
- bufpush(0x1E85);
- break;
- case 0x1E86:
- bufpush(0x1E87);
- break;
- case 0x1E88:
- bufpush(0x1E89);
- break;
- case 0x1E8A:
- bufpush(0x1E8B);
- break;
- case 0x1E8C:
- bufpush(0x1E8D);
- break;
- case 0x1E8E:
- bufpush(0x1E8F);
- break;
- case 0x1E90:
- bufpush(0x1E91);
- break;
- case 0x1E92:
- bufpush(0x1E93);
- break;
- case 0x1E94:
- bufpush(0x1E95);
- break;
- case 0x1E96:
- bufpush(0x0068);
- bufpush(0x0331);
- break;
- case 0x1E97:
- bufpush(0x0074);
- bufpush(0x0308);
- break;
- case 0x1E98:
- bufpush(0x0077);
- bufpush(0x030A);
- break;
- case 0x1E99:
- bufpush(0x0079);
- bufpush(0x030A);
- break;
- case 0x1E9A:
- bufpush(0x0061);
- bufpush(0x02BE);
- break;
- case 0x1E9B:
- bufpush(0x1E61);
- break;
- case 0x1EA0:
- bufpush(0x1EA1);
- break;
- case 0x1EA2:
- bufpush(0x1EA3);
- break;
- case 0x1EA4:
- bufpush(0x1EA5);
- break;
- case 0x1EA6:
- bufpush(0x1EA7);
- break;
- case 0x1EA8:
- bufpush(0x1EA9);
- break;
- case 0x1EAA:
- bufpush(0x1EAB);
- break;
- case 0x1EAC:
- bufpush(0x1EAD);
- break;
- case 0x1EAE:
- bufpush(0x1EAF);
- break;
- case 0x1EB0:
- bufpush(0x1EB1);
- break;
- case 0x1EB2:
- bufpush(0x1EB3);
- break;
- case 0x1EB4:
- bufpush(0x1EB5);
- break;
- case 0x1EB6:
- bufpush(0x1EB7);
- break;
- case 0x1EB8:
- bufpush(0x1EB9);
- break;
- case 0x1EBA:
- bufpush(0x1EBB);
- break;
- case 0x1EBC:
- bufpush(0x1EBD);
- break;
- case 0x1EBE:
- bufpush(0x1EBF);
- break;
- case 0x1EC0:
- bufpush(0x1EC1);
- break;
- case 0x1EC2:
- bufpush(0x1EC3);
- break;
- case 0x1EC4:
- bufpush(0x1EC5);
- break;
- case 0x1EC6:
- bufpush(0x1EC7);
- break;
- case 0x1EC8:
- bufpush(0x1EC9);
- break;
- case 0x1ECA:
- bufpush(0x1ECB);
- break;
- case 0x1ECC:
- bufpush(0x1ECD);
- break;
- case 0x1ECE:
- bufpush(0x1ECF);
- break;
- case 0x1ED0:
- bufpush(0x1ED1);
- break;
- case 0x1ED2:
- bufpush(0x1ED3);
- break;
- case 0x1ED4:
- bufpush(0x1ED5);
- break;
- case 0x1ED6:
- bufpush(0x1ED7);
- break;
- case 0x1ED8:
- bufpush(0x1ED9);
- break;
- case 0x1EDA:
- bufpush(0x1EDB);
- break;
- case 0x1EDC:
- bufpush(0x1EDD);
- break;
- case 0x1EDE:
- bufpush(0x1EDF);
- break;
- case 0x1EE0:
- bufpush(0x1EE1);
- break;
- case 0x1EE2:
- bufpush(0x1EE3);
- break;
- case 0x1EE4:
- bufpush(0x1EE5);
- break;
- case 0x1EE6:
- bufpush(0x1EE7);
- break;
- case 0x1EE8:
- bufpush(0x1EE9);
- break;
- case 0x1EEA:
- bufpush(0x1EEB);
- break;
- case 0x1EEC:
- bufpush(0x1EED);
- break;
- case 0x1EEE:
- bufpush(0x1EEF);
- break;
- case 0x1EF0:
- bufpush(0x1EF1);
- break;
- case 0x1EF2:
- bufpush(0x1EF3);
- break;
- case 0x1EF4:
- bufpush(0x1EF5);
- break;
- case 0x1EF6:
- bufpush(0x1EF7);
- break;
- case 0x1EF8:
- bufpush(0x1EF9);
- break;
- case 0x1F08:
- bufpush(0x1F00);
- break;
- case 0x1F09:
- bufpush(0x1F01);
- break;
- case 0x1F0A:
- bufpush(0x1F02);
- break;
- case 0x1F0B:
- bufpush(0x1F03);
- break;
- case 0x1F0C:
- bufpush(0x1F04);
- break;
- case 0x1F0D:
- bufpush(0x1F05);
- break;
- case 0x1F0E:
- bufpush(0x1F06);
- break;
- case 0x1F0F:
- bufpush(0x1F07);
- break;
- case 0x1F18:
- bufpush(0x1F10);
- break;
- case 0x1F19:
- bufpush(0x1F11);
- break;
- case 0x1F1A:
- bufpush(0x1F12);
- break;
- case 0x1F1B:
- bufpush(0x1F13);
- break;
- case 0x1F1C:
- bufpush(0x1F14);
- break;
- case 0x1F1D:
- bufpush(0x1F15);
- break;
- case 0x1F28:
- bufpush(0x1F20);
- break;
- case 0x1F29:
- bufpush(0x1F21);
- break;
- case 0x1F2A:
- bufpush(0x1F22);
- break;
- case 0x1F2B:
- bufpush(0x1F23);
- break;
- case 0x1F2C:
- bufpush(0x1F24);
- break;
- case 0x1F2D:
- bufpush(0x1F25);
- break;
- case 0x1F2E:
- bufpush(0x1F26);
- break;
- case 0x1F2F:
- bufpush(0x1F27);
- break;
- case 0x1F38:
- bufpush(0x1F30);
- break;
- case 0x1F39:
- bufpush(0x1F31);
- break;
- case 0x1F3A:
- bufpush(0x1F32);
- break;
- case 0x1F3B:
- bufpush(0x1F33);
- break;
- case 0x1F3C:
- bufpush(0x1F34);
- break;
- case 0x1F3D:
- bufpush(0x1F35);
- break;
- case 0x1F3E:
- bufpush(0x1F36);
- break;
- case 0x1F3F:
- bufpush(0x1F37);
- break;
- case 0x1F48:
- bufpush(0x1F40);
- break;
- case 0x1F49:
- bufpush(0x1F41);
- break;
- case 0x1F4A:
- bufpush(0x1F42);
- break;
- case 0x1F4B:
- bufpush(0x1F43);
- break;
- case 0x1F4C:
- bufpush(0x1F44);
- break;
- case 0x1F4D:
- bufpush(0x1F45);
- break;
- case 0x1F50:
- bufpush(0x03C5);
- bufpush(0x0313);
- break;
- case 0x1F52:
- bufpush(0x03C5);
- bufpush(0x0313);
- bufpush(0x0300);
- break;
- case 0x1F54:
- bufpush(0x03C5);
- bufpush(0x0313);
- bufpush(0x0301);
- break;
- case 0x1F56:
- bufpush(0x03C5);
- bufpush(0x0313);
- bufpush(0x0342);
- break;
- case 0x1F59:
- bufpush(0x1F51);
- break;
- case 0x1F5B:
- bufpush(0x1F53);
- break;
- case 0x1F5D:
- bufpush(0x1F55);
- break;
- case 0x1F5F:
- bufpush(0x1F57);
- break;
- case 0x1F68:
- bufpush(0x1F60);
- break;
- case 0x1F69:
- bufpush(0x1F61);
- break;
- case 0x1F6A:
- bufpush(0x1F62);
- break;
- case 0x1F6B:
- bufpush(0x1F63);
- break;
- case 0x1F6C:
- bufpush(0x1F64);
- break;
- case 0x1F6D:
- bufpush(0x1F65);
- break;
- case 0x1F6E:
- bufpush(0x1F66);
- break;
- case 0x1F6F:
- bufpush(0x1F67);
- break;
- case 0x1F80:
- bufpush(0x1F00);
- bufpush(0x03B9);
- break;
- case 0x1F81:
- bufpush(0x1F01);
- bufpush(0x03B9);
- break;
- case 0x1F82:
- bufpush(0x1F02);
- bufpush(0x03B9);
- break;
- case 0x1F83:
- bufpush(0x1F03);
- bufpush(0x03B9);
- break;
- case 0x1F84:
- bufpush(0x1F04);
- bufpush(0x03B9);
- break;
- case 0x1F85:
- bufpush(0x1F05);
- bufpush(0x03B9);
- break;
- case 0x1F86:
- bufpush(0x1F06);
- bufpush(0x03B9);
- break;
- case 0x1F87:
- bufpush(0x1F07);
- bufpush(0x03B9);
- break;
- case 0x1F88:
- bufpush(0x1F00);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1F89:
- bufpush(0x1F01);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1F8A:
- bufpush(0x1F02);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1F8B:
- bufpush(0x1F03);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1F8C:
- bufpush(0x1F04);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1F8D:
- bufpush(0x1F05);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1F8E:
- bufpush(0x1F06);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1F8F:
- bufpush(0x1F07);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1F90:
- bufpush(0x1F20);
- bufpush(0x03B9);
- break;
- case 0x1F91:
- bufpush(0x1F21);
- bufpush(0x03B9);
- break;
- case 0x1F92:
- bufpush(0x1F22);
- bufpush(0x03B9);
- break;
- case 0x1F93:
- bufpush(0x1F23);
- bufpush(0x03B9);
- break;
- case 0x1F94:
- bufpush(0x1F24);
- bufpush(0x03B9);
- break;
- case 0x1F95:
- bufpush(0x1F25);
- bufpush(0x03B9);
- break;
- case 0x1F96:
- bufpush(0x1F26);
- bufpush(0x03B9);
- break;
- case 0x1F97:
- bufpush(0x1F27);
- bufpush(0x03B9);
- break;
- case 0x1F98:
- bufpush(0x1F20);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1F99:
- bufpush(0x1F21);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1F9A:
- bufpush(0x1F22);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1F9B:
- bufpush(0x1F23);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1F9C:
- bufpush(0x1F24);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1F9D:
- bufpush(0x1F25);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1F9E:
- bufpush(0x1F26);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1F9F:
- bufpush(0x1F27);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1FA0:
- bufpush(0x1F60);
- bufpush(0x03B9);
- break;
- case 0x1FA1:
- bufpush(0x1F61);
- bufpush(0x03B9);
- break;
- case 0x1FA2:
- bufpush(0x1F62);
- bufpush(0x03B9);
- break;
- case 0x1FA3:
- bufpush(0x1F63);
- bufpush(0x03B9);
- break;
- case 0x1FA4:
- bufpush(0x1F64);
- bufpush(0x03B9);
- break;
- case 0x1FA5:
- bufpush(0x1F65);
- bufpush(0x03B9);
- break;
- case 0x1FA6:
- bufpush(0x1F66);
- bufpush(0x03B9);
- break;
- case 0x1FA7:
- bufpush(0x1F67);
- bufpush(0x03B9);
- break;
- case 0x1FA8:
- bufpush(0x1F60);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1FA9:
- bufpush(0x1F61);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1FAA:
- bufpush(0x1F62);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1FAB:
- bufpush(0x1F63);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1FAC:
- bufpush(0x1F64);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1FAD:
- bufpush(0x1F65);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1FAE:
- bufpush(0x1F66);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1FAF:
- bufpush(0x1F67);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1FB2:
- bufpush(0x1F70);
- bufpush(0x03B9);
- break;
- case 0x1FB3:
- bufpush(0x03B1);
- bufpush(0x03B9);
- break;
- case 0x1FB4:
- bufpush(0x03AC);
- bufpush(0x03B9);
- break;
- case 0x1FB6:
- bufpush(0x03B1);
- bufpush(0x0342);
- break;
- case 0x1FB7:
- bufpush(0x03B1);
- bufpush(0x0342);
- bufpush(0x03B9);
- break;
- case 0x1FB8:
- bufpush(0x1FB0);
- break;
- case 0x1FB9:
- bufpush(0x1FB1);
- break;
- case 0x1FBA:
- bufpush(0x1F70);
- break;
- case 0x1FBB:
- bufpush(0x1F71);
- break;
- case 0x1FBC:
- bufpush(0x03B1);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1FBE:
- bufpush(0x03B9);
- break;
- case 0x1FC2:
- bufpush(0x1F74);
- bufpush(0x03B9);
- break;
- case 0x1FC3:
- bufpush(0x03B7);
- bufpush(0x03B9);
- break;
- case 0x1FC4:
- bufpush(0x03AE);
- bufpush(0x03B9);
- break;
- case 0x1FC6:
- bufpush(0x03B7);
- bufpush(0x0342);
- break;
- case 0x1FC7:
- bufpush(0x03B7);
- bufpush(0x0342);
- bufpush(0x03B9);
- break;
- case 0x1FC8:
- bufpush(0x1F72);
- break;
- case 0x1FC9:
- bufpush(0x1F73);
- break;
- case 0x1FCA:
- bufpush(0x1F74);
- break;
- case 0x1FCB:
- bufpush(0x1F75);
- break;
- case 0x1FCC:
- bufpush(0x03B7);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x1FD2:
- bufpush(0x03B9);
- bufpush(0x0308);
- bufpush(0x0300);
- break;
- case 0x1FD3:
- bufpush(0x03B9);
- bufpush(0x0308);
- bufpush(0x0301);
- break;
- case 0x1FD6:
- bufpush(0x03B9);
- bufpush(0x0342);
- break;
- case 0x1FD7:
- bufpush(0x03B9);
- bufpush(0x0308);
- bufpush(0x0342);
- break;
- case 0x1FD8:
- bufpush(0x1FD0);
- break;
- case 0x1FD9:
- bufpush(0x1FD1);
- break;
- case 0x1FDA:
- bufpush(0x1F76);
- break;
- case 0x1FDB:
- bufpush(0x1F77);
- break;
- case 0x1FE2:
- bufpush(0x03C5);
- bufpush(0x0308);
- bufpush(0x0300);
- break;
- case 0x1FE3:
- bufpush(0x03C5);
- bufpush(0x0308);
- bufpush(0x0301);
- break;
- case 0x1FE4:
- bufpush(0x03C1);
- bufpush(0x0313);
- break;
- case 0x1FE6:
- bufpush(0x03C5);
- bufpush(0x0342);
- break;
- case 0x1FE7:
- bufpush(0x03C5);
- bufpush(0x0308);
- bufpush(0x0342);
- break;
- case 0x1FE8:
- bufpush(0x1FE0);
- break;
- case 0x1FE9:
- bufpush(0x1FE1);
- break;
- case 0x1FEA:
- bufpush(0x1F7A);
- break;
- case 0x1FEB:
- bufpush(0x1F7B);
- break;
- case 0x1FEC:
- bufpush(0x1FE5);
- break;
- case 0x1FF2:
- bufpush(0x1F7C);
- bufpush(0x03B9);
- break;
- case 0x1FF3:
- bufpush(0x03C9);
- bufpush(0x03B9);
- break;
- case 0x1FF4:
- bufpush(0x03CE);
- bufpush(0x03B9);
- break;
- case 0x1FF6:
- bufpush(0x03C9);
- bufpush(0x0342);
- break;
- case 0x1FF7:
- bufpush(0x03C9);
- bufpush(0x0342);
- bufpush(0x03B9);
- break;
- case 0x1FF8:
- bufpush(0x1F78);
- break;
- case 0x1FF9:
- bufpush(0x1F79);
- break;
- case 0x1FFA:
- bufpush(0x1F7C);
- break;
- case 0x1FFB:
- bufpush(0x1F7D);
- break;
- case 0x1FFC:
- bufpush(0x03C9);
- bufpush(0x03B9);
- break;
- case 0x:
- break;
- case 0x2126:
- bufpush(0x03C9);
- break;
- case 0x212A:
- bufpush(0x006B);
- break;
- case 0x212B:
- bufpush(0x00E5);
- break;
- case 0x2160:
- bufpush(0x2170);
- break;
- case 0x2161:
- bufpush(0x2171);
- break;
- case 0x2162:
- bufpush(0x2172);
- break;
- case 0x2163:
- bufpush(0x2173);
- break;
- case 0x2164:
- bufpush(0x2174);
- break;
- case 0x2165:
- bufpush(0x2175);
- break;
- case 0x2166:
- bufpush(0x2176);
- break;
- case 0x2167:
- bufpush(0x2177);
- break;
- case 0x2168:
- bufpush(0x2178);
- break;
- case 0x2169:
- bufpush(0x2179);
- break;
- case 0x216A:
- bufpush(0x217A);
- break;
- case 0x216B:
- bufpush(0x217B);
- break;
- case 0x216C:
- bufpush(0x217C);
- break;
- case 0x216D:
- bufpush(0x217D);
- break;
- case 0x216E:
- bufpush(0x217E);
- break;
- case 0x216F:
- bufpush(0x217F);
- break;
- case 0x24B6:
- bufpush(0x24D0);
- break;
- case 0x24B7:
- bufpush(0x24D1);
- break;
- case 0x24B8:
- bufpush(0x24D2);
- break;
- case 0x24B9:
- bufpush(0x24D3);
- break;
- case 0x24BA:
- bufpush(0x24D4);
- break;
- case 0x24BB:
- bufpush(0x24D5);
- break;
- case 0x24BC:
- bufpush(0x24D6);
- break;
- case 0x24BD:
- bufpush(0x24D7);
- break;
- case 0x24BE:
- bufpush(0x24D8);
- break;
- case 0x24BF:
- bufpush(0x24D9);
- break;
- case 0x24C0:
- bufpush(0x24DA);
- break;
- case 0x24C1:
- bufpush(0x24DB);
- break;
- case 0x24C2:
- bufpush(0x24DC);
- break;
- case 0x24C3:
- bufpush(0x24DD);
- break;
- case 0x24C4:
- bufpush(0x24DE);
- break;
- case 0x24C5:
- bufpush(0x24DF);
- break;
- case 0x24C6:
- bufpush(0x24E0);
- break;
- case 0x24C7:
- bufpush(0x24E1);
- break;
- case 0x24C8:
- bufpush(0x24E2);
- break;
- case 0x24C9:
- bufpush(0x24E3);
- break;
- case 0x24CA:
- bufpush(0x24E4);
- break;
- case 0x24CB:
- bufpush(0x24E5);
- break;
- case 0x24CC:
- bufpush(0x24E6);
- break;
- case 0x24CD:
- bufpush(0x24E7);
- break;
- case 0x24CE:
- bufpush(0x24E8);
- break;
- case 0x24CF:
- bufpush(0x24E9);
- break;
- case 0xFB00:
- bufpush(0x0066);
- bufpush(0x0066);
- break;
- case 0xFB01:
- bufpush(0x0066);
- bufpush(0x0069);
- break;
- case 0xFB02:
- bufpush(0x0066);
- bufpush(0x006C);
- break;
- case 0xFB03:
- bufpush(0x0066);
- bufpush(0x0066);
- bufpush(0x0069);
- break;
- case 0xFB04:
- bufpush(0x0066);
- bufpush(0x0066);
- bufpush(0x006C);
- break;
- case 0xFB05:
- bufpush(0x0073);
- bufpush(0x0074);
- break;
- case 0xFB06:
- bufpush(0x0073);
- bufpush(0x0074);
- break;
- case 0xFB13:
- bufpush(0x0574);
- bufpush(0x0576);
- break;
- case 0xFB14:
- bufpush(0x0574);
- bufpush(0x0565);
- break;
- case 0xFB15:
- bufpush(0x0574);
- bufpush(0x056B);
- break;
- case 0xFB16:
- bufpush(0x057E);
- bufpush(0x0576);
- break;
- case 0xFB17:
- bufpush(0x0574);
- bufpush(0x056D);
- break;
- case 0xFF21:
- bufpush(0xFF41);
- break;
- case 0xFF22:
- bufpush(0xFF42);
- break;
- case 0xFF23:
- bufpush(0xFF43);
- break;
- case 0xFF24:
- bufpush(0xFF44);
- break;
- case 0xFF25:
- bufpush(0xFF45);
- break;
- case 0xFF26:
- bufpush(0xFF46);
- break;
- case 0xFF27:
- bufpush(0xFF47);
- break;
- case 0xFF28:
- bufpush(0xFF48);
- break;
- case 0xFF29:
- bufpush(0xFF49);
- break;
- case 0xFF2A:
- bufpush(0xFF4A);
- break;
- case 0xFF2B:
- bufpush(0xFF4B);
- break;
- case 0xFF2C:
- bufpush(0xFF4C);
- break;
- case 0xFF2D:
- bufpush(0xFF4D);
- break;
- case 0xFF2E:
- bufpush(0xFF4E);
- break;
- case 0xFF2F:
- bufpush(0xFF4F);
- break;
- case 0xFF30:
- bufpush(0xFF50);
- break;
- case 0xFF31:
- bufpush(0xFF51);
- break;
- case 0xFF32:
- bufpush(0xFF52);
- break;
- case 0xFF33:
- bufpush(0xFF53);
- break;
- case 0xFF34:
- bufpush(0xFF54);
- break;
- case 0xFF35:
- bufpush(0xFF55);
- break;
- case 0xFF36:
- bufpush(0xFF56);
- break;
- case 0xFF37:
- bufpush(0xFF57);
- break;
- case 0xFF38:
- bufpush(0xFF58);
- break;
- case 0xFF39:
- bufpush(0xFF59);
- break;
- case 0xFF3A:
- bufpush(0xFF5A);
- break;
- case 0x10400:
- bufpush(0x10428);
- break;
- case 0x10401:
- bufpush(0x10429);
- break;
- case 0x10402:
- bufpush(0x1042A);
- break;
- case 0x10403:
- bufpush(0x1042B);
- break;
- case 0x10404:
- bufpush(0x1042C);
- break;
- case 0x10405:
- bufpush(0x1042D);
- break;
- case 0x10406:
- bufpush(0x1042E);
- break;
- case 0x10407:
- bufpush(0x1042F);
- break;
- case 0x10408:
- bufpush(0x10430);
- break;
- case 0x10409:
- bufpush(0x10431);
- break;
- case 0x1040A:
- bufpush(0x10432);
- break;
- case 0x1040B:
- bufpush(0x10433);
- break;
- case 0x1040C:
- bufpush(0x10434);
- break;
- case 0x1040D:
- bufpush(0x10435);
- break;
- case 0x1040E:
- bufpush(0x10436);
- break;
- case 0x1040F:
- bufpush(0x10437);
- break;
- case 0x10410:
- bufpush(0x10438);
- break;
- case 0x10411:
- bufpush(0x10439);
- break;
- case 0x10412:
- bufpush(0x1043A);
- break;
- case 0x10413:
- bufpush(0x1043B);
- break;
- case 0x10414:
- bufpush(0x1043C);
- break;
- case 0x10415:
- bufpush(0x1043D);
- break;
- case 0x10416:
- bufpush(0x1043E);
- break;
- case 0x10417:
- bufpush(0x1043F);
- break;
- case 0x10418:
- bufpush(0x10440);
- break;
- case 0x10419:
- bufpush(0x10441);
- break;
- case 0x1041A:
- bufpush(0x10442);
- break;
- case 0x1041B:
- bufpush(0x10443);
- break;
- case 0x1041C:
- bufpush(0x10444);
- break;
- case 0x1041D:
- bufpush(0x10445);
- break;
- case 0x1041E:
- bufpush(0x10446);
- break;
- case 0x1041F:
- bufpush(0x10447);
- break;
- case 0x10420:
- bufpush(0x10448);
- break;
- case 0x10421:
- bufpush(0x10449);
- break;
- case 0x10422:
- bufpush(0x1044A);
- break;
- case 0x10423:
- bufpush(0x1044B);
- break;
- case 0x10424:
- bufpush(0x1044C);
- break;
- case 0x10425:
- bufpush(0x1044D);
- break;
- }
diff --git a/src/detab.c b/src/detab.c
deleted file mode 100644
index e03fcf7..0000000
--- a/src/detab.c
+++ /dev/null
@@ -1,48 +0,0 @@
-#include "bstrlib.h"
-
-// UTF-8 aware detab: assumes s has no newlines, or only a final newline.
-// Return 0 on success, BSTR_ERR if invalid UTF-8.
-extern int bdetab(bstring s, int utf8)
-{
- unsigned char c;
- int pos = 0; // a count of characters
- int byte = 0; // a count of bytes
- int high_chars_to_skip = 0;
- int numspaces = 0;
- while ((c = bchar(s, byte))) {
- if (utf8 && high_chars_to_skip > 0) {
- if (c >= 0x80) {
- high_chars_to_skip--;
- byte++;
- } else {
- return BSTR_ERR; // invalid utf-8
- }
- } else if (c == '\t') {
- bdelete(s, byte, 1); // delete tab character
- numspaces = 4 - (pos % 4);
- binsertch(s, byte, numspaces, ' ');
- byte += numspaces;
- pos += numspaces;
- } else if (c <= 0x80 || !utf8) {
- byte++;
- pos++;
- } else { // multibyte utf8 sequences
- if (c >> 1 == 0176) {
- high_chars_to_skip = 5;
- } else if (c >> 2 == 076) {
- high_chars_to_skip = 4;
- } else if (c >> 3 == 036) {
- high_chars_to_skip = 3;
- } else if (c >> 4 == 016) {
- high_chars_to_skip = 2;
- } else if (c >> 5 == 06) {
- high_chars_to_skip = 1;
- } else {
- return BSTR_ERR; // invalid utf-8
- }
- pos++;
- byte++;
- }
- }
- return 0;
-}
diff --git a/src/getopt.c b/src/getopt.c
deleted file mode 100644
index 321dd9f..0000000
--- a/src/getopt.c
+++ /dev/null
@@ -1,199 +0,0 @@
-/* $Id: getopt.c 4022 2008-03-31 06:11:07Z rra $
- *
- * Replacement implementation of getopt.
- *
- * This is a replacement implementation for getopt based on the my_getopt
- * distribution by Benjamin Sittler. Only the getopt interface is included,
- * since remctl doesn't use GNU long options, and the code has been rearranged
- * and reworked somewhat to fit with the remctl coding style.
- *
- * Copyright 1997, 2000, 2001, 2002 Benjamin Sittler
- * Copyright 2008 Russ Allbery <rra@stanford.edu>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
- * DEALINGS IN THE SOFTWARE.
- */
-
-#include <config.h>
-#include <portable/system.h>
-#include <portable/getopt.h>
-
-/*
- * If we're running the test suite, rename getopt and the global variables to
- * avoid conflicts with the system version.
- */
-#if TESTING
-# define getopt test_getopt
-int test_getopt(int, char **, const char *);
-# define optind test_optind
-# define opterr test_opterr
-# define optopt test_optopt
-# define optarg test_optarg
-#endif
-
-/* Initialize global interface variables. */
-int optind = 1;
-int opterr = 1;
-int optopt = 0;
-char *optarg = NULL;
-
-/*
- * This is the plain old UNIX getopt, with GNU-style extensions. If you're
- * porting some piece of UNIX software, this is all you need. It supports
- * GNU-style permutation and optional arguments, but does not support the GNU
- * -W extension.
- *
- * This function is not re-entrant or thread-safe, has static variables, and
- * generally isn't a great interface, but normally you only call it once.
- */
-int
-getopt(int argc, char *argv[], const char *optstring)
-{
- const char *p;
- size_t offset = 0;
- char mode = '\0';
- int colon_mode = 0;
- int option = -1;
-
- /* Holds the current position in the parameter being parsed. */
- static int charind = 0;
-
- /*
- * By default, getopt permutes argv as it scans and leaves all non-options
- * at the end. This can be changed with the first character of optstring
- * or the environment variable POSIXLY_CORRECT. With a first character of
- * '+' or when POSIXLY_CORRECT is set, option processing stops at the
- * first non-option. If the first character is '-', each non-option argv
- * element is handled as if it were the argument of an option with
- * character code 1. mode holds this character.
- *
- * After the optional leading '+' and '-', optstring may contain ':'. If
- * present, missing arguments return ':' instead of '?'. colon_mode holds
- * this setting.
- */
- if (getenv("POSIXLY_CORRECT") != NULL) {
- mode = '+';
- colon_mode = '+';
- } else {
- if (optstring[offset] == '+' || optstring[offset] == '-') {
- mode = optstring[offset];
- offset++;
- }
- if (optstring[offset] == ':') {
- colon_mode = 1;
- offset++;
- }
- }
-
- /*
- * charind holds where we left off. If it's set, we were in the middle
- * of an argv element; if not, we pick up with the next element of
- * optind.
- */
- optarg = NULL;
- if (charind == 0) {
- if (optind >= argc)
- option = -1;
- else if (strcmp(argv[optind], "--") == 0) {
- optind++;
- option = -1;
- } else if (argv[optind][0] != '-' || argv[optind][1] == '\0') {
- char *tmp;
- int i, j, k, end;
-
- if (mode == '+')
- option = -1;
- else if (mode == '-') {
- optarg = argv[optind];
- optind++;
- option = 1;
- } else {
- for (i = optind + 1, j = optind; i < argc; i++)
- if ((argv[i][0] == '-') && (argv[i][1] != '\0')) {
- optind = i;
- option = getopt(argc, argv, optstring);
- while (i > j) {
- --i;
- tmp = argv[i];
- end = (charind == 0) ? optind - 1 : optind;
- for (k = i; k + 1 <= end; k++) {
- argv[k] = argv[k + 1];
- }
- argv[end] = tmp;
- --optind;
- }
- break;
- }
- if (i == argc)
- option = -1;
- }
- return option;
- } else {
- charind = 1;
- }
- }
- if (charind != 0) {
- optopt = argv[optind][charind];
- for (p = optstring + offset; *p != '\0'; p++)
- if (optopt == *p) {
- p++;
- if (*p == ':') {
- if (argv[optind][charind + 1] != '\0') {
- optarg = &argv[optind][charind + 1];
- optind++;
- charind = 0;
- } else {
- p++;
- if (*p != ':') {
- charind = 0;
- optind++;
- if (optind >= argc) {
- if (opterr)
- fprintf(stderr, "%s: option requires"
- " an argument -- %c\n", argv[0],
- optopt);
- option = colon_mode ? ':' : '?';
- goto done;
- } else {
- optarg = argv[optind];
- optind++;
- }
- }
- }
- }
- option = optopt;
- }
- if (option == -1) {
- if (opterr)
- fprintf(stderr, "%s: illegal option -- %c\n", argv[0], optopt);
- option = '?';
- }
- }
-
-done:
- if (charind != 0) {
- charind++;
- if (argv[optind][charind] == '\0') {
- optind++;
- charind = 0;
- }
- }
- if (optind > argc)
- optind = argc;
- return option;
-}
diff --git a/src/inlines.c b/src/inlines.c
index f75c846..4ff45ad 100644
--- a/src/inlines.c
+++ b/src/inlines.c
@@ -2,133 +2,154 @@
#include <stdio.h>
#include <stdbool.h>
#include <ctype.h>
-#include "bstrlib.h"
+#include <string.h>
+
#include "stmd.h"
#include "uthash.h"
#include "debug.h"
#include "scanners.h"
#include "utf8.h"
+typedef struct Subject {
+ const gh_buf *buffer;
+ int pos;
+ reference** reference_map;
+ int label_nestlevel;
+} subject;
+
+reference* lookup_reference(reference** refmap, chunk *label);
+reference* make_reference(chunk *label, chunk *url, chunk *title);
+
+static unsigned char *clean_url(chunk *url);
+static unsigned char *clean_title(chunk *title);
+
+inline static unsigned char *chunk_to_cstr(chunk *c);
+inline static void chunk_free(chunk *c);
+inline static void chunk_trim(chunk *c);
+
+inline static chunk chunk_literal(const char *data);
+inline static chunk chunk_buf_detach(gh_buf *buf);
+inline static chunk chunk_buf(const gh_buf *buf, int pos, int len);
+
+static inl *parse_chunk_inlines(chunk *chunk, reference** refmap);
+static inl *parse_inlines_while(subject* subj, int (*f)(subject*));
+static int parse_inline(subject* subj, inl ** last);
+
extern void free_reference(reference *ref) {
- bdestroy(ref->label);
- bdestroy(ref->url);
- bdestroy(ref->title);
- free(ref);
+ free(ref->label);
+ free(ref->url);
+ free(ref->title);
+ free(ref);
}
extern void free_reference_map(reference **refmap) {
- /* free the hash table contents */
- reference *s;
- reference *tmp;
- if (refmap != NULL) {
- HASH_ITER(hh, *refmap, s, tmp) {
- HASH_DEL(*refmap, s);
- free_reference(s);
- }
- free(refmap);
- }
+ /* free the hash table contents */
+ reference *s;
+ reference *tmp;
+ if (refmap != NULL) {
+ HASH_ITER(hh, *refmap, s, tmp) {
+ HASH_DEL(*refmap, s);
+ free_reference(s);
+ }
+ free(refmap);
+ }
}
// normalize reference: collapse internal whitespace to single space,
// remove leading/trailing whitespace, case fold
-static bstring normalize_reference(bstring s)
-{
- bstring normalized = case_fold(s);
- int pos = 0;
- int startpos;
- char c;
- while ((c = bchar(normalized, pos))) {
- if (isspace(c)) {
- startpos = pos;
- // skip til next non-space
- pos++;
- while (isspace(bchar(s, pos))) {
- pos++;
- }
- bdelete(normalized, startpos, pos - startpos);
- binsertch(normalized, startpos, 1, ' ');
- pos = startpos + 1;
- }
- pos++;
- }
- btrimws(normalized);
- return normalized;
+static unsigned char *normalize_reference(chunk *ref)
+{
+ gh_buf normalized = GH_BUF_INIT;
+ int r, w;
+
+ utf8proc_case_fold(&normalized, ref->data, ref->len);
+ gh_buf_trim(&normalized);
+
+ for (r = 0, w = 0; r < normalized.size; ++r) {
+ if (r && gh_buf_at(&normalized, r - 1) == ' ') {
+ while (gh_buf_at(&normalized, r) == ' ')
+ r++;
+ }
+
+ normalized.ptr[w++] = normalized.ptr[r];
+ }
+
+ return gh_buf_detach(&normalized);
}
// Returns reference if refmap contains a reference with matching
// label, otherwise NULL.
-extern reference* lookup_reference(reference** refmap, bstring lab)
+extern reference* lookup_reference(reference** refmap, chunk *label)
{
- reference * ref = NULL;
- bstring label = normalize_reference(lab);
- if (refmap != NULL) {
- HASH_FIND_STR(*refmap, (char*) label->data, ref);
- }
- bdestroy(label);
- return ref;
+ reference *ref = NULL;
+ unsigned char *norm = normalize_reference(label);
+ if (refmap != NULL) {
+ HASH_FIND_STR(*refmap, (char*)norm, ref);
+ }
+ free(label);
+ return ref;
}
-extern reference* make_reference(bstring label, bstring url, bstring title)
+extern reference* make_reference(chunk *label, chunk *url, chunk *title)
{
- reference * ref;
- ref = malloc(sizeof(reference));
- ref->label = normalize_reference(label);
- ref->url = bstrcpy(url);
- ref->title = bstrcpy(title);
- return ref;
+ reference *ref;
+ ref = malloc(sizeof(reference));
+ ref->label = normalize_reference(label);
+ ref->url = clean_url(url);
+ ref->title = clean_title(title);
+ return ref;
}
extern void add_reference(reference** refmap, reference* ref)
{
- reference * t = NULL;
- HASH_FIND(hh, *refmap, (char*) ref->label->data,
- (unsigned) blength(ref->label), t);
- if (t == NULL) {
- HASH_ADD_KEYPTR(hh, *refmap, (char*) ref->label->data,
- (unsigned) blength(ref->label), ref);
- } else {
- free_reference(ref); // we free this now since it won't be in the refmap
- }
+ reference * t = NULL;
+ HASH_FIND(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), t);
+
+ if (t == NULL) {
+ HASH_ADD_KEYPTR(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), ref);
+ } else {
+ free_reference(ref); // we free this now since it won't be in the refmap
+ }
}
// Create an inline with a linkable string value.
-inline static inl* make_linkable(int t, inl* label, bstring url, bstring title)
+inline static inl* make_linkable(int t, inl* label, chunk url, chunk title)
{
- inl* e = (inl*) malloc(sizeof(inl));
- e->tag = t;
- e->content.linkable.label = label;
- e->content.linkable.url = url;
- e->content.linkable.title = title;
- e->next = NULL;
- return e;
+ inl* e = (inl*) malloc(sizeof(inl));
+ e->tag = t;
+ e->content.linkable.label = label;
+ e->content.linkable.url = chunk_to_cstr(&url);
+ e->content.linkable.title = chunk_to_cstr(&title);
+ e->next = NULL;
+ return e;
}
inline static inl* make_inlines(int t, inl* contents)
{
- inl* e = (inl*) malloc(sizeof(inl));
- e->tag = t;
- e->content.inlines = contents;
- e->next = NULL;
- return e;
+ inl* e = (inl*) malloc(sizeof(inl));
+ e->tag = t;
+ e->content.inlines = contents;
+ e->next = NULL;
+ return e;
}
// Create an inline with a literal string value.
-inline static inl* make_literal(int t, bstring s)
+inline static inl* make_literal(int t, chunk s)
{
- inl* e = (inl*) malloc(sizeof(inl));
- e->tag = t;
- e->content.literal = s;
- e->next = NULL;
- return e;
+ inl* e = (inl*) malloc(sizeof(inl));
+ e->tag = t;
+ e->content.literal = s;
+ e->next = NULL;
+ return e;
}
// Create an inline with no value.
inline static inl* make_simple(int t)
{
- inl* e = (inl*) malloc(sizeof(inl));
- e->tag = t;
- e->next = NULL;
- return e;
+ inl* e = (inl*) malloc(sizeof(inl));
+ e->tag = t;
+ e->next = NULL;
+ return e;
}
// Macros for creating various kinds of inlines.
@@ -139,113 +160,157 @@ inline static inl* make_simple(int t)
#define make_linebreak() make_simple(linebreak)
#define make_softbreak() make_simple(softbreak)
#define make_link(label, url, title) make_linkable(link, label, url, title)
-#define make_image(alt, url, title) make_linkable(image, alt, url, title)
#define make_emph(contents) make_inlines(emph, contents)
#define make_strong(contents) make_inlines(strong, contents)
// Free an inline list.
extern void free_inlines(inl* e)
{
- inl * next;
- while (e != NULL) {
- switch (e->tag){
- case str:
- case raw_html:
- case code:
- case entity:
- bdestroy(e->content.literal);
- break;
- case linebreak:
- case softbreak:
- break;
- case link:
- case image:
- bdestroy(e->content.linkable.url);
- bdestroy(e->content.linkable.title);
- free_inlines(e->content.linkable.label);
- break;
- case emph:
- case strong:
- free_inlines(e->content.inlines);
- break;
- default:
- break;
- }
- next = e->next;
- free(e);
- e = next;
- }
+ inl * next;
+ while (e != NULL) {
+ switch (e->tag){
+ case str:
+ case raw_html:
+ case code:
+ case entity:
+ chunk_free(&e->content.literal);
+ break;
+ case linebreak:
+ case softbreak:
+ break;
+ case link:
+ case image:
+ free(e->content.linkable.url);
+ free(e->content.linkable.title);
+ free_inlines(e->content.linkable.label);
+ break;
+ case emph:
+ case strong:
+ free_inlines(e->content.inlines);
+ break;
+ default:
+ break;
+ }
+ next = e->next;
+ free(e);
+ e = next;
+ }
}
// Append inline list b to the end of inline list a.
// Return pointer to head of new list.
inline static inl* append_inlines(inl* a, inl* b)
{
- if (a == NULL) { // NULL acts like an empty list
- return b;
- }
- inl* cur = a;
- while (cur->next) {
- cur = cur->next;
- }
- cur->next = b;
- return a;
+ if (a == NULL) { // NULL acts like an empty list
+ return b;
+ }
+ inl* cur = a;
+ while (cur->next) {
+ cur = cur->next;
+ }
+ cur->next = b;
+ return a;
}
// Make a 'subject' from an input string.
-static subject* make_subject(bstring s, reference** refmap)
+static void init_subject(subject *e, gh_buf *buffer, int input_pos, reference** refmap)
{
- subject* e = (subject*) malloc(sizeof(subject));
- // remove final whitespace
- brtrimws(s);
- e->buffer = s;
- e->pos = 0;
- e->label_nestlevel = 0;
- e->reference_map = refmap;
- return e;
+ e->buffer = buffer;
+ e->pos = input_pos;
+ e->label_nestlevel = 0;
+ e->reference_map = refmap;
}
inline static int isbacktick(int c)
{
- return (c == '`');
+ return (c == '`');
+}
+
+inline static void chunk_free(chunk *c)
+{
+ if (c->alloc)
+ free((char *)c->data);
+
+ c->data = NULL;
+ c->alloc = 0;
+ c->len = 0;
+}
+
+inline static void chunk_trim(chunk *c)
+{
+ while (c->len && isspace(c->data[0])) {
+ c->data++;
+ c->len--;
+ }
+
+ while (c->len > 0) {
+ if (!isspace(c->data[c->len - 1]))
+ break;
+
+ c->len--;
+ }
+}
+
+inline static unsigned char *chunk_to_cstr(chunk *c)
+{
+ unsigned char *str;
+
+ str = malloc(c->len + 1);
+ memcpy(str, c->data, c->len);
+ str[c->len] = 0;
+
+ return str;
+}
+
+inline static chunk chunk_literal(const char *data)
+{
+ chunk c = {data, strlen(data), 0};
+ return c;
+}
+
+inline static chunk chunk_buf(const gh_buf *buf, int pos, int len)
+{
+ chunk c = {buf->ptr + pos, len, 0};
+ return c;
+}
+
+inline static chunk chunk_buf_detach(gh_buf *buf)
+{
+ chunk c;
+
+ c.len = buf->size;
+ c.data = gh_buf_detach(buf);
+ c.alloc = 1;
+
+ return c;
}
// Return the next character in the subject, without advancing.
// Return 0 if at the end of the subject.
-#define peek_char(subj) bchar(subj->buffer, subj->pos)
+#define peek_char(subj) gh_buf_at((subj)->buffer, (subj)->pos)
// Return true if there are more characters in the subject.
inline static int is_eof(subject* subj)
{
- return (subj->pos >= blength(subj->buffer));
+ return (subj->pos >= gh_buf_len(subj->buffer));
}
// Advance the subject. Doesn't check for eof.
-#define advance(subj) subj->pos += 1
+#define advance(subj) (subj)->pos += 1
// Take characters while a predicate holds, and return a string.
-inline static bstring take_while(subject* subj, int (*f)(int))
+inline static chunk take_while(subject* subj, int (*f)(int))
{
- unsigned char c;
- int startpos = subj->pos;
- int len = 0;
- while ((c = peek_char(subj)) && (*f)(c)) {
- advance(subj);
- len++;
- }
- return bmidstr(subj->buffer, startpos, len);
-}
+ unsigned char c;
+ int startpos = subj->pos;
+ int len = 0;
-// Take one character and return a string, or NULL if eof.
-inline static bstring take_one(subject* subj)
-{
- int startpos = subj->pos;
- if (is_eof(subj)){
- return NULL;
- } else {
- advance(subj);
- return bmidstr(subj->buffer, startpos, 1);
- }
+ while ((c = peek_char(subj)) && (*f)(c)) {
+ advance(subj);
+ len++;
+ }
+
+ return chunk_buf(subj->buffer, startpos, len);
}
// Try to process a backtick code span that began with a
@@ -255,381 +320,406 @@ inline static bstring take_one(subject* subj)
// after the closing backticks.
static int scan_to_closing_backticks(subject* subj, int openticklength)
{
- // read non backticks
- char c;
- while ((c = peek_char(subj)) && c != '`') {
- advance(subj);
- }
- if (is_eof(subj)) {
- return 0; // did not find closing ticks, return 0
- }
- int numticks = 0;
- while (peek_char(subj) == '`') {
- advance(subj);
- numticks++;
- }
- if (numticks != openticklength){
- return(scan_to_closing_backticks(subj, openticklength));
- }
- return (subj->pos);
-}
-
-// Destructively modify bstring, collapsing consecutive
+ // read non backticks
+ char c;
+ while ((c = peek_char(subj)) && c != '`') {
+ advance(subj);
+ }
+ if (is_eof(subj)) {
+ return 0; // did not find closing ticks, return 0
+ }
+ int numticks = 0;
+ while (peek_char(subj) == '`') {
+ advance(subj);
+ numticks++;
+ }
+ if (numticks != openticklength){
+ return(scan_to_closing_backticks(subj, openticklength));
+ }
+ return (subj->pos);
+}
+
+// Destructively modify string, collapsing consecutive
// space and newline characters into a single space.
-static int normalize_whitespace(bstring s)
-{
- bool last_char_was_space = false;
- int pos = 0;
- char c;
- while ((c = bchar(s, pos))) {
- switch (c) {
- case ' ':
- if (last_char_was_space) {
- bdelete(s, pos, 1);
- } else {
- pos++;
- }
- last_char_was_space = true;
- break;
- case '\n':
- if (last_char_was_space) {
- bdelete(s, pos, 1);
- } else {
- bdelete(s, pos, 1);
- binsertch(s, pos, 1, ' ');
- pos++;
- }
- last_char_was_space = true;
- break;
- default:
- pos++;
- last_char_was_space = false;
- }
- }
- return 0;
+static void normalize_whitespace(gh_buf *s)
+{
+ /* TODO */
+#if 0
+ bool last_char_was_space = false;
+ int pos = 0;
+ char c;
+ while ((c = gh_buf_at(s, pos))) {
+ switch (c) {
+ case ' ':
+ if (last_char_was_space) {
+ bdelete(s, pos, 1);
+ } else {
+ pos++;
+ }
+ last_char_was_space = true;
+ break;
+ case '\n':
+ if (last_char_was_space) {
+ bdelete(s, pos, 1);
+ } else {
+ bdelete(s, pos, 1);
+ binsertch(s, pos, 1, ' ');
+ pos++;
+ }
+ last_char_was_space = true;
+ break;
+ default:
+ pos++;
+ last_char_was_space = false;
+ }
+ }
+#endif
}
// Parse backtick code section or raw backticks, return an inline.
// Assumes that the subject has a backtick at the current position.
static inl* handle_backticks(subject *subj)
{
- bstring openticks = take_while(subj, isbacktick);
- bstring result;
- int ticklength = blength(openticks);
- int startpos = subj->pos;
- int endpos = scan_to_closing_backticks(subj, ticklength);
- if (endpos == 0) { // not found
- subj->pos = startpos; // rewind
- return make_str(openticks);
- } else {
- bdestroy(openticks);
- result = bmidstr(subj->buffer, startpos, endpos - startpos - ticklength);
- btrimws(result);
- normalize_whitespace(result);
- return make_code(result);
- }
+ chunk openticks = take_while(subj, isbacktick);
+ int startpos = subj->pos;
+ int endpos = scan_to_closing_backticks(subj, openticks.len);
+
+ if (endpos == 0) { // not found
+ subj->pos = startpos; // rewind
+ return make_str(openticks);
+ } else {
+ gh_buf buf = GH_BUF_INIT;
+
+ gh_buf_set(&buf, subj->buffer->ptr + startpos, endpos - startpos - openticks.len);
+ gh_buf_trim(&buf);
+ normalize_whitespace(&buf);
+
+ return make_code(chunk_buf_detach(&buf));
+ }
}
// Scan ***, **, or * and return number scanned, or 0.
// Don't advance position.
static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close)
{
- int numdelims = 0;
- char char_before, char_after;
- int startpos = subj->pos;
-
- char_before = subj->pos == 0 ? '\n' : bchar(subj->buffer, subj->pos - 1);
- while (peek_char(subj) == c) {
- numdelims++;
- advance(subj);
- }
- char_after = peek_char(subj);
- *can_open = numdelims > 0 && numdelims <= 3 && !isspace(char_after);
- *can_close = numdelims > 0 && numdelims <= 3 && !isspace(char_before);
- if (c == '_') {
- *can_open = *can_open && !isalnum(char_before);
- *can_close = *can_close && !isalnum(char_after);
- }
- subj->pos = startpos;
- return numdelims;
+ int numdelims = 0;
+ char char_before, char_after;
+ int startpos = subj->pos;
+
+ char_before = subj->pos == 0 ? '\n' : gh_buf_at(subj->buffer, subj->pos - 1);
+ while (peek_char(subj) == c) {
+ numdelims++;
+ advance(subj);
+ }
+ char_after = peek_char(subj);
+ *can_open = numdelims > 0 && numdelims <= 3 && !isspace(char_after);
+ *can_close = numdelims > 0 && numdelims <= 3 && !isspace(char_before);
+ if (c == '_') {
+ *can_open = *can_open && !isalnum(char_before);
+ *can_close = *can_close && !isalnum(char_after);
+ }
+ subj->pos = startpos;
+ return numdelims;
}
// Parse strong/emph or a fallback.
// Assumes the subject has '_' or '*' at the current position.
static inl* handle_strong_emph(subject* subj, char c)
{
- bool can_open, can_close;
- inl * result = NULL;
- inl ** last = malloc(sizeof(inl *));
- inl * new;
- inl * il;
- inl * first_head = NULL;
- inl * first_close = NULL;
- int first_close_delims = 0;
- int numdelims;
-
- *last = NULL;
-
- numdelims = scan_delims(subj, c, &can_open, &can_close);
- subj->pos += numdelims;
-
- new = make_str(bmidstr(subj->buffer, subj->pos - numdelims, numdelims));
- *last = new;
- first_head = new;
- result = new;
-
- if (!can_open || numdelims == 0) {
- goto done;
- }
-
- switch (numdelims) {
- case 1:
- while (true) {
- numdelims = scan_delims(subj, c, &can_open, &can_close);
- if (numdelims >= 1 && can_close) {
- subj->pos += 1;
- first_head->tag = emph;
- bdestroy(first_head->content.literal);
- first_head->content.inlines = first_head->next;
- first_head->next = NULL;
- goto done;
- } else {
- if (!parse_inline(subj, last)) {
- goto done;
- }
- }
- }
- break;
- case 2:
- while (true) {
- numdelims = scan_delims(subj, c, &can_open, &can_close);
- if (numdelims >= 2 && can_close) {
- subj->pos += 2;
- first_head->tag = strong;
- bdestroy(first_head->content.literal);
- first_head->content.inlines = first_head->next;
- first_head->next = NULL;
- goto done;
- } else {
- if (!parse_inline(subj, last)) {
- goto done;
- }
- }
- }
- break;
- case 3:
- while (true) {
- numdelims = scan_delims(subj, c, &can_open, &can_close);
- if (can_close && numdelims >= 1 && numdelims <= 3 &&
- numdelims != first_close_delims) {
- new = make_str(bmidstr(subj->buffer, subj->pos, numdelims));
- append_inlines(*last, new);
- *last = new;
-
- if (first_close_delims == 1 && numdelims > 2) {
- numdelims = 2;
- } else if (first_close_delims == 2) {
- numdelims = 1;
- } else if (numdelims == 3) {
- // If we opened with ***, we interpret it as ** followed by *
- // giving us <strong><em>
- numdelims = 1;
- }
-
- subj->pos += numdelims;
- if (first_close) {
- first_head->tag = first_close_delims == 1 ? strong : emph;
- bdestroy(first_head->content.literal);
- first_head->content.inlines =
- make_inlines(first_close_delims == 1 ? emph : strong,
- first_head->next);
-
- il = first_head->next;
- while (il->next && il->next != first_close) {
- il = il->next;
- }
- il->next = NULL;
-
- first_head->content.inlines->next = first_close->next;
-
- il = first_head->content.inlines;
- while (il->next && il->next != *last) {
- il = il->next;
- }
- il->next = NULL;
- free_inlines(*last);
-
- first_close->next = NULL;
- free_inlines(first_close);
- first_head->next = NULL;
- goto done;
- } else {
- first_close = *last;
- first_close_delims = numdelims;
- }
- } else {
- if (!parse_inline(subj, last)) {
- goto done;
- }
- }
- }
- break;
- default:
- goto done;
- }
-
- done:
- free(last);
- return result;
+ bool can_open, can_close;
+ inl * result = NULL;
+ inl ** last = malloc(sizeof(inl *));
+ inl * new;
+ inl * il;
+ inl * first_head = NULL;
+ inl * first_close = NULL;
+ int first_close_delims = 0;
+ int numdelims;
+
+ *last = NULL;
+
+ numdelims = scan_delims(subj, c, &can_open, &can_close);
+ subj->pos += numdelims;
+
+ new = make_str(chunk_buf(subj->buffer, subj->pos - numdelims, numdelims));
+ *last = new;
+ first_head = new;
+ result = new;
+
+ if (!can_open || numdelims == 0) {
+ goto done;
+ }
+
+ switch (numdelims) {
+ case 1:
+ while (true) {
+ numdelims = scan_delims(subj, c, &can_open, &can_close);
+ if (numdelims >= 1 && can_close) {
+ subj->pos += 1;
+ first_head->tag = emph;
+ chunk_free(&first_head->content.literal);
+ first_head->content.inlines = first_head->next;
+ first_head->next = NULL;
+ goto done;
+ } else {
+ if (!parse_inline(subj, last)) {
+ goto done;
+ }
+ }
+ }
+ break;
+ case 2:
+ while (true) {
+ numdelims = scan_delims(subj, c, &can_open, &can_close);
+ if (numdelims >= 2 && can_close) {
+ subj->pos += 2;
+ first_head->tag = strong;
+ chunk_free(&first_head->content.literal);
+ first_head->content.inlines = first_head->next;
+ first_head->next = NULL;
+ goto done;
+ } else {
+ if (!parse_inline(subj, last)) {
+ goto done;
+ }
+ }
+ }
+ break;
+ case 3:
+ while (true) {
+ numdelims = scan_delims(subj, c, &can_open, &can_close);
+ if (can_close && numdelims >= 1 && numdelims <= 3 &&
+ numdelims != first_close_delims) {
+ new = make_str(chunk_buf(subj->buffer, subj->pos, numdelims));
+ append_inlines(*last, new);
+ *last = new;
+ if (first_close_delims == 1 && numdelims > 2) {
+ numdelims = 2;
+ } else if (first_close_delims == 2) {
+ numdelims = 1;
+ } else if (numdelims == 3) {
+ // If we opened with ***, we interpret it as ** followed by *
+ // giving us <strong><em>
+ numdelims = 1;
+ }
+ subj->pos += numdelims;
+ if (first_close) {
+ first_head->tag = first_close_delims == 1 ? strong : emph;
+ chunk_free(&first_head->content.literal);
+ first_head->content.inlines =
+ make_inlines(first_close_delims == 1 ? emph : strong,
+ first_head->next);
+
+ il = first_head->next;
+ while (il->next && il->next != first_close) {
+ il = il->next;
+ }
+ il->next = NULL;
+
+ first_head->content.inlines->next = first_close->next;
+
+ il = first_head->content.inlines;
+ while (il->next && il->next != *last) {
+ il = il->next;
+ }
+ il->next = NULL;
+ free_inlines(*last);
+
+ first_close->next = NULL;
+ free_inlines(first_close);
+ first_head->next = NULL;
+ goto done;
+ } else {
+ first_close = *last;
+ first_close_delims = numdelims;
+ }
+ } else {
+ if (!parse_inline(subj, last)) {
+ goto done;
+ }
+ }
+ }
+ break;
+ default:
+ goto done;
+ }
+
+done:
+ free(last);
+ return result;
}
// Parse backslash-escape or just a backslash, returning an inline.
static inl* handle_backslash(subject *subj)
{
- advance(subj);
- unsigned char nextchar = peek_char(subj);
- if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped
- advance(subj);
- return make_str(bformat("%c", nextchar));
- } else if (nextchar == '\n') {
- advance(subj);
- return make_linebreak();
- } else {
- return make_str(bfromcstr("\\"));
- }
+ advance(subj);
+ unsigned char nextchar = peek_char(subj);
+ if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped
+ advance(subj);
+ return make_str(chunk_buf(subj->buffer, subj->pos - 1, 1));
+ } else if (nextchar == '\n') {
+ advance(subj);
+ return make_linebreak();
+ } else {
+ return make_str(chunk_literal("\\"));
+ }
}
// Parse an entity or a regular "&" string.
// Assumes the subject has an '&' character at the current position.
static inl* handle_entity(subject* subj)
{
- int match;
- inl * result;
- match = scan_entity(subj->buffer, subj->pos);
- if (match) {
- result = make_entity(bmidstr(subj->buffer, subj->pos, match));
- subj->pos += match;
- } else {
- advance(subj);
- result = make_str(bfromcstr("&"));
- }
- return result;
+ int match;
+ inl *result;
+ match = scan_entity(subj->buffer, subj->pos);
+ if (match) {
+ result = make_entity(chunk_buf(subj->buffer, subj->pos, match));
+ subj->pos += match;
+ } else {
+ advance(subj);
+ result = make_str(chunk_literal("&"));
+ }
+ return result;
}
// Like make_str, but parses entities.
// Returns an inline sequence consisting of str and entity elements.
-static inl * make_str_with_entities(bstring s)
-{
- inl * result = NULL;
- inl * new;
- int searchpos;
- char c;
- subject * subj = make_subject(s, NULL);
-
- while ((c = peek_char(subj))) {
- switch (c) {
- case '&':
- new = handle_entity(subj);
- break;
- default:
- searchpos = bstrchrp(subj->buffer, '&', subj->pos);
- if (searchpos == BSTR_ERR) {
- searchpos = blength(subj->buffer);
- }
- new = make_str(bmidstr(subj->buffer, subj->pos, searchpos - subj->pos));
- subj->pos = searchpos;
- }
- result = append_inlines(result, new);
- }
- free(subj);
- return result;
+static inl *make_str_with_entities(chunk *content)
+{
+ inl * result = NULL;
+ inl * new;
+ int searchpos;
+ char c;
+ subject subj;
+ gh_buf content_buf = GH_BUF_INIT;
+
+ gh_buf_set(&content_buf, content->data, content->len);
+ init_subject(&subj, &content_buf, 0, NULL);
+
+ while ((c = peek_char(&subj))) {
+ switch (c) {
+ case '&':
+ new = handle_entity(&subj);
+ break;
+ default:
+ searchpos = gh_buf_strchr(subj.buffer, '&', subj.pos);
+ if (searchpos < 0) {
+ searchpos = gh_buf_len(subj.buffer);
+ }
+
+ new = make_str(chunk_buf(subj.buffer, subj.pos, searchpos - subj.pos));
+ subj.pos = searchpos;
+ }
+ result = append_inlines(result, new);
+ }
+
+ gh_buf_free(&content_buf);
+ return result;
}
// Destructively unescape a string: remove backslashes before punctuation chars.
-extern int unescape(bstring url)
+extern void unescape_buffer(gh_buf *buf)
{
- // remove backslashes before punctuation chars:
- int searchpos = 0;
- while ((searchpos = bstrchrp(url, '\\', searchpos)) != BSTR_ERR) {
- if (ispunct(bchar(url, searchpos + 1))) {
- bdelete(url, searchpos, 1);
- } else {
- searchpos++;
- }
- }
- return 0;
+ int r, w;
+
+ for (r = 0, w = 0; r < buf->size; ++r) {
+ if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1]))
+ continue;
+
+ buf->ptr[w++] = buf->ptr[r];
+ }
+
+ gh_buf_truncate(buf, w);
}
// Clean a URL: remove surrounding whitespace and surrounding <>,
// and remove \ that escape punctuation.
-static int clean_url(bstring url)
+static unsigned char *clean_url(chunk *url)
{
- // remove surrounding <> if any:
- int urllength = blength(url);
- btrimws(url);
- if (bchar(url, 0) == '<' && bchar(url, urllength - 1) == '>') {
- bdelete(url, 0, 1);
- bdelete(url, urllength - 2, 1);
- }
- unescape(url);
- return 0;
+ gh_buf buf = GH_BUF_INIT;
+
+ chunk_trim(url);
+
+ if (url->data[0] == '<' && url->data[url->len - 1] == '>') {
+ gh_buf_set(&buf, url->data + 1, url->len - 2);
+ } else {
+ gh_buf_set(&buf, url->data, url->len);
+ }
+
+ unescape_buffer(&buf);
+ return gh_buf_detach(&buf);
}
// Clean a title: remove surrounding quotes and remove \ that escape punctuation.
-static int clean_title(bstring title)
+static unsigned char *clean_title(chunk *title)
{
- // remove surrounding quotes if any:
- int titlelength = blength(title);
- if ((bchar(title, 0) == '\'' && bchar(title, titlelength - 1) == '\'') ||
- (bchar(title, 0) == '(' && bchar(title, titlelength - 1) == ')') ||
- (bchar(title, 0) == '"' && bchar(title, titlelength - 1) == '"')) {
- bdelete(title, 0, 1);
- bdelete(title, titlelength - 2, 1);
- }
- unescape(title);
- return 0;
+ gh_buf buf = GH_BUF_INIT;
+ unsigned char first = title->data[0];
+ unsigned char last = title->data[title->len - 1];
+
+ // remove surrounding quotes if any:
+ if ((first == '\'' && last == '\'') ||
+ (first == '(' && last == ')') ||
+ (first == '"' && last == '"')) {
+ gh_buf_set(&buf, title->data + 1, title->len - 2);
+ } else {
+ gh_buf_set(&buf, title->data, title->len);
+ }
+
+ unescape_buffer(&buf);
+ return gh_buf_detach(&buf);
}
// Parse an autolink or HTML tag.
// Assumes the subject has a '<' character at the current position.
static inl* handle_pointy_brace(subject* subj)
{
- int matchlen = 0;
- bstring contents;
- inl* result;
-
- advance(subj); // advance past first <
- // first try to match a URL autolink
- matchlen = scan_autolink_uri(subj->buffer, subj->pos);
- if (matchlen > 0) {
- contents = bmidstr(subj->buffer, subj->pos, matchlen - 1);
- subj->pos += matchlen;
- result = make_link(make_str_with_entities(contents),
- bstrcpy(contents), bfromcstr(""));
- bdestroy(contents);
- return result;
- }
- // next try to match an email autolink
- matchlen = scan_autolink_email(subj->buffer, subj->pos);
- if (matchlen > 0) {
- contents = bmidstr(subj->buffer, subj->pos, matchlen - 1);
- subj->pos += matchlen;
- result = make_link(make_str_with_entities(contents),
- bformat("mailto:%s", contents->data),
- bfromcstr(""));
- bdestroy(contents);
- return result;
- }
- // finally, try to match an html tag
- matchlen = scan_html_tag(subj->buffer, subj->pos);
- if (matchlen > 0) {
- contents = bmidstr(subj->buffer, subj->pos, matchlen);
- binsertch(contents, 0, 1, '<');
- subj->pos += matchlen;
- return make_raw_html(contents);
- } else {// if nothing matches, just return the opening <:
- return make_str(bfromcstr("<"));
- }
+ int matchlen = 0;
+ chunk contents;
+
+ advance(subj); // advance past first <
+
+ // first try to match a URL autolink
+ matchlen = scan_autolink_uri(subj->buffer, subj->pos);
+ if (matchlen > 0) {
+ contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1);
+ subj->pos += matchlen;
+
+ return make_link(
+ make_str_with_entities(&contents),
+ contents,
+ chunk_literal("")
+ );
+ }
+
+ // next try to match an email autolink
+ matchlen = scan_autolink_email(subj->buffer, subj->pos);
+ if (matchlen > 0) {
+ gh_buf mail_url = GH_BUF_INIT;
+
+ contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1);
+ subj->pos += matchlen;
+
+ gh_buf_puts(&mail_url, "mailto:");
+ gh_buf_put(&mail_url, contents.data, contents.len);
+
+ return make_link(
+ make_str_with_entities(&contents),
+ chunk_buf_detach(&mail_url),
+ chunk_literal("")
+ );
+ }
+
+ // finally, try to match an html tag
+ matchlen = scan_html_tag(subj->buffer, subj->pos);
+ if (matchlen > 0) {
+ contents = chunk_buf(subj->buffer, subj->pos - 1, matchlen + 1);
+ subj->pos += matchlen;
+ return make_raw_html(contents);
+ }
+
+ // if nothing matches, just return the opening <:
+ return make_str(chunk_literal("<"));
}
// Parse a link label. Returns 1 if successful.
@@ -641,366 +731,381 @@ static inl* handle_pointy_brace(subject* subj)
// markers. So, 2 below contains a link while 1 does not:
// 1. [a link `with a ](/url)` character
// 2. [a link *with emphasized ](/url) text*
-static int link_label(subject* subj, bstring* raw_label)
-{
- int nestlevel = 0;
- inl* tmp = NULL;
- bstring raw;
- int startpos = subj->pos;
- if (subj->label_nestlevel) {
- // if we've already checked to the end of the subject
- // for a label, even with a different starting [, we
- // know we won't find one here and we can just return.
- // Note: nestlevel 1 would be: [foo [bar]
- // nestlevel 2 would be: [foo [bar [baz]
- subj->label_nestlevel--;
- return 0;
- }
- advance(subj); // advance past [
- char c;
- while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) {
- switch (c) {
- case '`':
- tmp = handle_backticks(subj);
- free_inlines(tmp);
- break;
- case '<':
- tmp = handle_pointy_brace(subj);
- free_inlines(tmp);
- break;
- case '[': // nested []
- nestlevel++;
- advance(subj);
- break;
- case ']': // nested []
- nestlevel--;
- advance(subj);
- break;
- case '\\':
- advance(subj);
- if (ispunct(peek_char(subj))) {
- advance(subj);
- }
- break;
- default:
- advance(subj);
- }
- }
- if (c == ']') {
- if (raw_label != NULL) {
- raw = bmidstr(subj->buffer, startpos + 1, subj->pos - (startpos + 1));
- *raw_label = raw;
- }
- subj->label_nestlevel = 0;
- advance(subj); // advance past ]
- return 1;
- } else {
- if (c == 0) {
- subj->label_nestlevel = nestlevel;
- }
- subj->pos = startpos; // rewind
- return 0;
- }
+static int link_label(subject* subj, chunk *raw_label)
+{
+ int nestlevel = 0;
+ inl* tmp = NULL;
+ int startpos = subj->pos;
+
+ if (subj->label_nestlevel) {
+ // if we've already checked to the end of the subject
+ // for a label, even with a different starting [, we
+ // know we won't find one here and we can just return.
+ // Note: nestlevel 1 would be: [foo [bar]
+ // nestlevel 2 would be: [foo [bar [baz]
+ subj->label_nestlevel--;
+ return 0;
+ }
+
+ advance(subj); // advance past [
+ char c;
+ while ((c = peek_char(subj)) && (c != ']' || nestlevel > 0)) {
+ switch (c) {
+ case '`':
+ tmp = handle_backticks(subj);
+ free_inlines(tmp);
+ break;
+ case '<':
+ tmp = handle_pointy_brace(subj);
+ free_inlines(tmp);
+ break;
+ case '[': // nested []
+ nestlevel++;
+ advance(subj);
+ break;
+ case ']': // nested []
+ nestlevel--;
+ advance(subj);
+ break;
+ case '\\':
+ advance(subj);
+ if (ispunct(peek_char(subj))) {
+ advance(subj);
+ }
+ break;
+ default:
+ advance(subj);
+ }
+ }
+ if (c == ']') {
+ *raw_label = chunk_buf(
+ subj->buffer,
+ startpos + 1,
+ subj->pos - (startpos + 1)
+ );
+
+ subj->label_nestlevel = 0;
+ advance(subj); // advance past ]
+ return 1;
+ } else {
+ if (c == 0) {
+ subj->label_nestlevel = nestlevel;
+ }
+ subj->pos = startpos; // rewind
+ return 0;
+ }
}
// Parse a link or the link portion of an image, or return a fallback.
static inl* handle_left_bracket(subject* subj)
{
- inl* lab = NULL;
- inl* result = NULL;
- reference* ref;
- int n;
- int sps;
- int found_label;
- int endlabel, starturl, endurl, starttitle, endtitle, endall;
- bstring url, title, rawlabel, reflabel;
- bstring rawlabel2 = NULL;
- found_label = link_label(subj, &rawlabel);
- endlabel = subj->pos;
- if (found_label) {
- if (peek_char(subj) == '(' &&
- ((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) &&
- ((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) {
- // try to parse an explicit link:
- starturl = subj->pos + 1 + sps; // after (
- endurl = starturl + n;
- starttitle = endurl + scan_spacechars(subj->buffer, endurl);
- // ensure there are spaces btw url and title
- endtitle = (starttitle == endurl) ? starttitle :
- starttitle + scan_link_title(subj->buffer, starttitle);
- endall = endtitle + scan_spacechars(subj->buffer, endtitle);
- if (bchar(subj->buffer, endall) == ')') {
- subj->pos = endall + 1;
- url = bmidstr(subj->buffer, starturl, endurl - starturl);
- clean_url(url);
- title = bmidstr(subj->buffer, starttitle, endtitle - starttitle);
- clean_title(title);
- lab = parse_inlines(rawlabel, NULL);
- bdestroy(rawlabel);
- return make_link(lab, url, title);
- } else {
- // if we get here, we matched a label but didn't get further:
- subj->pos = endlabel;
- lab = parse_inlines(rawlabel, subj->reference_map);
- bdestroy(rawlabel);
- result = append_inlines(make_str(bfromcstr("[")),
- append_inlines(lab,
- make_str(bfromcstr("]"))));
- return result;
- }
- } else {
- // Check for reference link.
- // First, see if there's another label:
- subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel);
- reflabel = rawlabel;
- // if followed by a nonempty link label, we change reflabel to it:
- if (peek_char(subj) == '[' &&
- link_label(subj, &rawlabel2)) {
- if (blength(rawlabel2) > 0) {
- reflabel = rawlabel2;
- }
- } else {
- subj->pos = endlabel;
- }
- // lookup rawlabel in subject->reference_map:
- ref = lookup_reference(subj->reference_map, reflabel);
- if (ref != NULL) { // found
- lab = parse_inlines(rawlabel, NULL);
- result = make_link(lab, bstrcpy(ref->url), bstrcpy(ref->title));
- } else {
- subj->pos = endlabel;
- lab = parse_inlines(rawlabel, subj->reference_map);
- result = append_inlines(make_str(bfromcstr("[")),
- append_inlines(lab, make_str(bfromcstr("]"))));
- }
- bdestroy(rawlabel);
- bdestroy(rawlabel2);
- return result;
- }
- }
- // If we fall through to here, it means we didn't match a link:
- advance(subj); // advance past [
- return make_str(bfromcstr("["));
+ inl *lab = NULL;
+ inl *result = NULL;
+ reference *ref;
+ int n;
+ int sps;
+ int found_label;
+ int endlabel, starturl, endurl, starttitle, endtitle, endall;
+
+ chunk rawlabel;
+ chunk url, title;
+
+ found_label = link_label(subj, &rawlabel);
+ endlabel = subj->pos;
+
+ if (found_label) {
+ if (peek_char(subj) == '(' &&
+ ((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) &&
+ ((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) {
+
+ // try to parse an explicit link:
+ starturl = subj->pos + 1 + sps; // after (
+ endurl = starturl + n;
+ starttitle = endurl + scan_spacechars(subj->buffer, endurl);
+
+ // ensure there are spaces btw url and title
+ endtitle = (starttitle == endurl) ? starttitle :
+ starttitle + scan_link_title(subj->buffer, starttitle);
+
+ endall = endtitle + scan_spacechars(subj->buffer, endtitle);
+
+ if (gh_buf_at(subj->buffer, endall) == ')') {
+ subj->pos = endall + 1;
+
+ url = chunk_buf(subj->buffer, starturl, endurl - starturl);
+ title = chunk_buf(subj->buffer, starttitle, endtitle - starttitle);
+ lab = parse_chunk_inlines(&rawlabel, NULL);
+
+ return make_link(lab, url, title);
+ } else {
+ // if we get here, we matched a label but didn't get further:
+ subj->pos = endlabel;
+ lab = parse_chunk_inlines(&rawlabel, subj->reference_map);
+ result = append_inlines(make_str(chunk_literal("[")),
+ append_inlines(lab,
+ make_str(chunk_literal("]"))));
+ return result;
+ }
+ } else {
+ chunk rawlabel_tmp;
+ chunk reflabel;
+
+ // Check for reference link.
+ // First, see if there's another label:
+ subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel);
+ reflabel = rawlabel;
+
+ // if followed by a nonempty link label, we change reflabel to it:
+ if (peek_char(subj) == '[' && link_label(subj, &rawlabel_tmp)) {
+ if (rawlabel_tmp.len > 0)
+ reflabel = rawlabel_tmp;
+ } else {
+ subj->pos = endlabel;
+ }
+
+ // lookup rawlabel in subject->reference_map:
+ ref = lookup_reference(subj->reference_map, &reflabel);
+ if (ref != NULL) { // found
+ lab = parse_chunk_inlines(&rawlabel, NULL);
+ result = make_link(lab, chunk_literal(ref->url), chunk_literal(ref->title));
+ } else {
+ subj->pos = endlabel;
+ lab = parse_chunk_inlines(&rawlabel, subj->reference_map);
+ result = append_inlines(make_str(chunk_literal("[")),
+ append_inlines(lab, make_str(chunk_literal("]"))));
+ }
+ return result;
+ }
+ }
+ // If we fall through to here, it means we didn't match a link:
+ advance(subj); // advance past [
+ return make_str(chunk_literal("["));
}
// Parse a hard or soft linebreak, returning an inline.
// Assumes the subject has a newline at the current position.
static inl* handle_newline(subject *subj)
{
- int nlpos = subj->pos;
- // skip over newline
- advance(subj);
- // skip spaces at beginning of line
- while (peek_char(subj) == ' ') {
- advance(subj);
- }
- if (nlpos > 1 &&
- bchar(subj->buffer, nlpos - 1) == ' ' &&
- bchar(subj->buffer, nlpos - 2) == ' ') {
- return make_linebreak();
- } else {
- return make_softbreak();
- }
+ int nlpos = subj->pos;
+ // skip over newline
+ advance(subj);
+ // skip spaces at beginning of line
+ while (peek_char(subj) == ' ') {
+ advance(subj);
+ }
+ if (nlpos > 1 &&
+ gh_buf_at(subj->buffer, nlpos - 1) == ' ' &&
+ gh_buf_at(subj->buffer, nlpos - 2) == ' ') {
+ return make_linebreak();
+ } else {
+ return make_softbreak();
+ }
}
inline static int not_eof(subject* subj)
{
- return !is_eof(subj);
+ return !is_eof(subj);
}
// Parse inlines while a predicate is satisfied. Return inlines.
extern inl* parse_inlines_while(subject* subj, int (*f)(subject*))
{
- inl* result = NULL;
- inl** last = &result;
- while ((*f)(subj) && parse_inline(subj, last)) {
- }
- return result;
+ inl* result = NULL;
+ inl** last = &result;
+ while ((*f)(subj) && parse_inline(subj, last)) {
+ }
+ return result;
+}
+
+inl *parse_chunk_inlines(chunk *chunk, reference** refmap)
+{
+ inl *result;
+ subject subj;
+ gh_buf full_chunk = GH_BUF_INIT;
+
+ gh_buf_set(&full_chunk, chunk->data, chunk->len);
+ init_subject(&subj, &full_chunk, 0, refmap);
+ result = parse_inlines_while(&subj, not_eof);
+
+ gh_buf_free(&full_chunk);
+ return result;
+}
+
+static int find_special_char(subject *subj)
+{
+ int n = subj->pos + 1;
+ int size = (int)gh_buf_len(subj->buffer);
+
+ while (n < size) {
+ if (strchr("\n\\`&_*[]<!", gh_buf_at(subj->buffer, n)))
+ return n;
+ }
+
+ return -1;
}
// Parse an inline, advancing subject, and add it to last element.
// Adjust tail to point to new last element of list.
// Return 0 if no inline can be parsed, 1 otherwise.
-extern int parse_inline(subject* subj, inl ** last)
-{
- inl* new = NULL;
- bstring contents;
- bstring special_chars;
- unsigned char c;
- int endpos;
- c = peek_char(subj);
- if (c == 0) {
- return 0;
- }
- switch(c){
- case '\n':
- new = handle_newline(subj);
- break;
- case '`':
- new = handle_backticks(subj);
- break;
- case '\\':
- new = handle_backslash(subj);
- break;
- case '&':
- new = handle_entity(subj);
- break;
- case '<':
- new = handle_pointy_brace(subj);
- break;
- case '_':
- if (subj->pos > 0 && (isalnum(bchar(subj->buffer, subj->pos - 1)) ||
- bchar(subj->buffer, subj->pos - 1) == '_')) {
- new = make_str(take_one(subj));
- } else {
- new = handle_strong_emph(subj, '_');
- }
- break;
- case '*':
- new = handle_strong_emph(subj, '*');
- break;
- case '[':
- new = handle_left_bracket(subj);
- break;
- case '!':
- advance(subj);
- if (peek_char(subj) == '[') {
- new = handle_left_bracket(subj);
- if (new != NULL && new->tag == link) {
- new->tag = image;
- } else {
- new = append_inlines(make_str(bfromcstr("!")), new);
- }
- } else {
- new = make_str(bfromcstr("!"));
- }
- break;
- default:
- // we read until we hit a special character
- special_chars = bfromcstr("\n\\`&_*[]<!");
- endpos = binchr(subj->buffer, subj->pos, special_chars);
- bdestroy(special_chars);
- if (endpos == subj->pos) {
- // current char is special: read a 1-character str
- contents = take_one(subj);
- } else if (endpos == BSTR_ERR) {
- // special char not found, take whole rest of buffer:
- endpos = subj->buffer->slen;
- contents = bmidstr(subj->buffer, subj->pos, endpos - subj->pos);
- subj->pos = endpos;
- } else {
- // take buffer from subj->pos to endpos to str.
- contents = bmidstr(subj->buffer, subj->pos, endpos - subj->pos);
- subj->pos = endpos;
- // if we're at a newline, strip trailing spaces.
- if (peek_char(subj) == '\n') {
- brtrimws(contents);
- }
- }
- new = make_str(contents);
- }
- if (*last == NULL) {
- *last = new;
- } else {
- append_inlines(*last, new);
- }
- return 1;
-}
-
-extern inl* parse_inlines(bstring input, reference** refmap)
-{
- subject * subj = make_subject(input, refmap);
- inl * result = parse_inlines_while(subj, not_eof);
- free(subj);
- return result;
+static int parse_inline(subject* subj, inl ** last)
+{
+ inl* new = NULL;
+ chunk contents;
+ unsigned char c;
+ int endpos;
+ c = peek_char(subj);
+ if (c == 0) {
+ return 0;
+ }
+ switch(c){
+ case '\n':
+ new = handle_newline(subj);
+ break;
+ case '`':
+ new = handle_backticks(subj);
+ break;
+ case '\\':
+ new = handle_backslash(subj);
+ break;
+ case '&':
+ new = handle_entity(subj);
+ break;
+ case '<':
+ new = handle_pointy_brace(subj);
+ break;
+ case '_':
+ if (subj->pos > 0 && (isalnum(gh_buf_at(subj->buffer, subj->pos - 1)) ||
+ gh_buf_at(subj->buffer, subj->pos - 1) == '_')) {
+ goto text_literal;
+ }
+
+ new = handle_strong_emph(subj, '_');
+ break;
+ case '*':
+ new = handle_strong_emph(subj, '*');
+ break;
+ case '[':
+ new = handle_left_bracket(subj);
+ break;
+ case '!':
+ advance(subj);
+ if (peek_char(subj) == '[') {
+ new = handle_left_bracket(subj);
+ if (new != NULL && new->tag == link) {
+ new->tag = image;
+ } else {
+ new = append_inlines(make_str(chunk_literal("!")), new);
+ }
+ } else {
+ new = make_str(chunk_literal("!"));
+ }
+ break;
+ default:
+ text_literal:
+ endpos = find_special_char(subj);
+ if (endpos < 0) {
+ endpos = gh_buf_len(subj->buffer);
+ }
+
+ contents = chunk_buf(subj->buffer, subj->pos, endpos - subj->pos);
+ subj->pos = endpos;
+
+ // if we're at a newline, strip trailing spaces.
+ if (peek_char(subj) == '\n') {
+ chunk_trim(&contents);
+ }
+
+ new = make_str(contents);
+ }
+ if (*last == NULL) {
+ *last = new;
+ } else {
+ append_inlines(*last, new);
+ }
+ return 1;
+}
+
+extern inl* parse_inlines(gh_buf *input, int input_pos, reference** refmap)
+{
+ subject subj;
+ init_subject(&subj, input, input_pos, refmap);
+ return parse_inlines_while(&subj, not_eof);
}
// Parse zero or more space characters, including at most one newline.
void spnl(subject* subj)
{
- bool seen_newline = false;
- while (peek_char(subj) == ' ' ||
- (!seen_newline &&
- (seen_newline = peek_char(subj) == '\n'))) {
- advance(subj);
- }
+ bool seen_newline = false;
+ while (peek_char(subj) == ' ' ||
+ (!seen_newline &&
+ (seen_newline = peek_char(subj) == '\n'))) {
+ advance(subj);
+ }
}
// Parse reference. Assumes string begins with '[' character.
// Modify refmap if a reference is encountered.
// Return 0 if no reference found, otherwise position of subject
// after reference is parsed.
-extern int parse_reference(bstring input, reference** refmap)
-{
- subject * subj = make_subject(input, NULL);
- bstring lab = NULL;
- bstring url = NULL;
- bstring title = NULL;
- int matchlen = 0;
- int beforetitle;
- reference * new = NULL;
- int newpos;
-
- // parse label:
- if (!link_label(subj, &lab)) {
- free(subj);
- return 0;
- }
- // colon:
- if (peek_char(subj) == ':') {
- advance(subj);
- } else {
- free(subj);
- bdestroy(lab);
- return 0;
- }
- // parse link url:
- spnl(subj);
- matchlen = scan_link_url(subj->buffer, subj->pos);
- if (matchlen) {
- url = bmidstr(subj->buffer, subj->pos, matchlen);
- clean_url(url);
- subj->pos += matchlen;
- } else {
- free(subj);
- bdestroy(lab);
- bdestroy(url);
- return 0;
- }
- // parse optional link_title
- beforetitle = subj->pos;
- spnl(subj);
- matchlen = scan_link_title(subj->buffer, subj->pos);
- if (matchlen) {
- title = bmidstr(subj->buffer, subj->pos, matchlen);
- clean_title(title);
- subj->pos += matchlen;
- } else {
- subj->pos = beforetitle;
- title = bfromcstr("");
- }
- // parse final spaces and newline:
- while (peek_char(subj) == ' ') {
- advance(subj);
- }
- if (peek_char(subj) == '\n') {
- advance(subj);
- } else if (peek_char(subj) != 0) {
- free(subj);
- bdestroy(lab);
- bdestroy(url);
- bdestroy(title);
- return 0;
- }
- // insert reference into refmap
- new = make_reference(lab, url, title);
- add_reference(refmap, new);
-
- newpos = subj->pos;
- free(subj);
- bdestroy(lab);
- bdestroy(url);
- bdestroy(title);
- return newpos;
+extern int parse_reference(gh_buf *input, int input_pos, reference** refmap)
+{
+ subject subj;
+
+ chunk lab;
+ chunk url;
+ chunk title;
+
+ int matchlen = 0;
+ int beforetitle;
+ reference * new = NULL;
+
+ init_subject(&subj, input, input_pos, NULL);
+
+ // parse label:
+ if (!link_label(&subj, &lab))
+ return 0;
+
+ // colon:
+ if (peek_char(&subj) == ':') {
+ advance(&subj);
+ } else {
+ return 0;
+ }
+
+ // parse link url:
+ spnl(&subj);
+ matchlen = scan_link_url(subj.buffer, subj.pos);
+ if (matchlen) {
+ url = chunk_buf(subj.buffer, subj.pos, matchlen);
+ subj.pos += matchlen;
+ } else {
+ return 0;
+ }
+
+ // parse optional link_title
+ beforetitle = subj.pos;
+ spnl(&subj);
+ matchlen = scan_link_title(subj.buffer, subj.pos);
+ if (matchlen) {
+ title = chunk_buf(subj.buffer, subj.pos, matchlen);
+ subj.pos += matchlen;
+ } else {
+ subj.pos = beforetitle;
+ title = chunk_literal("");
+ }
+ // parse final spaces and newline:
+ while (peek_char(&subj) == ' ') {
+ advance(&subj);
+ }
+ if (peek_char(&subj) == '\n') {
+ advance(&subj);
+ } else if (peek_char(&subj) != 0) {
+ return 0;
+ }
+ // insert reference into refmap
+ new = make_reference(&lab, &url, &title);
+ add_reference(refmap, new);
+
+ return subj.pos;
}
diff --git a/src/main.c b/src/main.c
index f0ecb82..9e0a3c8 100644
--- a/src/main.c
+++ b/src/main.c
@@ -88,7 +88,7 @@ int main(int argc, char *argv[]) {
print_blocks(cur, 0);
} else {
check(blocks_to_html(cur, &html, false) == 0, "could not format as HTML");
- printf("%s", html->data);
+ // printf("%s", html->data);
bdestroy(html);
}
free_blocks(cur);
diff --git a/src/scanners.h b/src/scanners.h
index 71e0520..b6e586b 100644
--- a/src/scanners.h
+++ b/src/scanners.h
@@ -1,15 +1,15 @@
-#include "bstrlib.h"
+#include "buffer.h"
-int scan_autolink_uri(bstring s, int pos);
-int scan_autolink_email(bstring s, int pos);
-int scan_html_tag(bstring s, int pos);
-int scan_html_block_tag(bstring s, int pos);
-int scan_link_url(bstring s, int pos);
-int scan_link_title(bstring s, int pos);
-int scan_spacechars(bstring s, int pos);
-int scan_atx_header_start(bstring s, int pos);
-int scan_setext_header_line(bstring s, int pos);
-int scan_hrule(bstring s, int pos);
-int scan_open_code_fence(bstring s, int pos);
-int scan_close_code_fence(bstring s, int pos, int len);
-int scan_entity(bstring s, int pos);
+int scan_autolink_uri(const gh_buf *s, int pos);
+int scan_autolink_email(const gh_buf *s, int pos);
+int scan_html_tag(const gh_buf *s, int pos);
+int scan_html_block_tag(const gh_buf *s, int pos);
+int scan_link_url(const gh_buf *s, int pos);
+int scan_link_title(const gh_buf *s, int pos);
+int scan_spacechars(const gh_buf *s, int pos);
+int scan_atx_header_start(const gh_buf *s, int pos);
+int scan_setext_header_line(const gh_buf *s, int pos);
+int scan_hrule(const gh_buf *s, int pos);
+int scan_open_code_fence(const gh_buf *s, int pos);
+int scan_close_code_fence(const gh_buf *s, int pos, int len);
+int scan_entity(const gh_buf *s, int pos);
diff --git a/src/scanners.re b/src/scanners.re
index 305d1ea..7323ef9 100644
--- a/src/scanners.re
+++ b/src/scanners.re
@@ -1,4 +1,4 @@
-#include "bstrlib.h"
+#include "buffer.h"
/*!re2c
re2c:define:YYCTYPE = "unsigned char";
@@ -55,10 +55,10 @@
*/
// Try to match URI autolink after first <, returning number of chars matched.
-extern int scan_autolink_uri(bstring s, int pos)
+extern int scan_autolink_uri(const gh_buf *s, int pos)
{
unsigned char * marker = NULL;
- unsigned char * p = &(s->data[pos]);
+ unsigned char * p = &(s->ptr[pos]);
unsigned char * start = p;
/*!re2c
scheme [:]([^\x00-\x20<>\\]|escaped_char)*[>] { return (p - start); }
@@ -67,10 +67,10 @@ extern int scan_autolink_uri(bstring s, int pos)
}
// Try to match email autolink after first <, returning num of chars matched.
-extern int scan_autolink_email(bstring s, int pos)
+extern int scan_autolink_email(const gh_buf *s, int pos)
{
unsigned char * marker = NULL;
- unsigned char * p = &(s->data[pos]);
+ unsigned char * p = &(s->ptr[pos]);
unsigned char * start = p;
/*!re2c
[a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+
@@ -83,10 +83,10 @@ extern int scan_autolink_email(bstring s, int pos)
}
// Try to match an HTML tag after first <, returning num of chars matched.
-extern int scan_html_tag(bstring s, int pos)
+extern int scan_html_tag(const gh_buf *s, int pos)
{
unsigned char * marker = NULL;
- unsigned char * p = &(s->data[pos]);
+ unsigned char * p = &(s->ptr[pos]);
unsigned char * start = p;
/*!re2c
htmltag { return (p - start); }
@@ -96,10 +96,10 @@ extern int scan_html_tag(bstring s, int pos)
// Try to match an HTML block tag including first <,
// returning num of chars matched.
-extern int scan_html_block_tag(bstring s, int pos)
+extern int scan_html_block_tag(const gh_buf *s, int pos)
{
unsigned char * marker = NULL;
- unsigned char * p = &(s->data[pos]);
+ unsigned char * p = &(s->ptr[pos]);
unsigned char * start = p;
/*!re2c
[<] [/] blocktagname (spacechar | [>]) { return (p - start); }
@@ -113,10 +113,10 @@ extern int scan_html_block_tag(bstring s, int pos)
// This may optionally be contained in <..>; otherwise
// whitespace and unbalanced right parentheses aren't allowed.
// Newlines aren't ever allowed.
-extern int scan_link_url(bstring s, int pos)
+extern int scan_link_url(const gh_buf *s, int pos)
{
unsigned char * marker = NULL;
- unsigned char * p = &(s->data[pos]);
+ unsigned char * p = &(s->ptr[pos]);
unsigned char * start = p;
/*!re2c
[ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); }
@@ -128,10 +128,10 @@ extern int scan_link_url(bstring s, int pos)
// Try to match a link title (in single quotes, in double quotes, or
// in parentheses), returning number of chars matched. Allow one
// level of internal nesting (quotes within quotes).
-extern int scan_link_title(bstring s, int pos)
+extern int scan_link_title(const gh_buf *s, int pos)
{
unsigned char * marker = NULL;
- unsigned char * p = &(s->data[pos]);
+ unsigned char * p = &(s->ptr[pos]);
unsigned char * start = p;
/*!re2c
["] (escaped_char|[^"\x00])* ["] { return (p - start); }
@@ -142,9 +142,9 @@ extern int scan_link_title(bstring s, int pos)
}
// Match space characters, including newlines.
-extern int scan_spacechars(bstring s, int pos)
+extern int scan_spacechars(const gh_buf *s, int pos)
{
- unsigned char * p = &(s->data[pos]);
+ unsigned char * p = &(s->ptr[pos]);
unsigned char * start = p;
/*!re2c
[ \t\n]* { return (p - start); }
@@ -153,10 +153,10 @@ extern int scan_spacechars(bstring s, int pos)
}
// Match ATX header start.
-extern int scan_atx_header_start(bstring s, int pos)
+extern int scan_atx_header_start(const gh_buf *s, int pos)
{
unsigned char * marker = NULL;
- unsigned char * p = &(s->data[pos]);
+ unsigned char * p = &(s->ptr[pos]);
unsigned char * start = p;
/*!re2c
[#]{1,6} ([ ]+|[\n]) { return (p - start); }
@@ -166,10 +166,10 @@ extern int scan_atx_header_start(bstring s, int pos)
// Match sexext header line. Return 1 for level-1 header,
// 2 for level-2, 0 for no match.
-extern int scan_setext_header_line(bstring s, int pos)
+extern int scan_setext_header_line(const gh_buf *s, int pos)
{
unsigned char * marker = NULL;
- unsigned char * p = &(s->data[pos]);
+ unsigned char * p = &(s->ptr[pos]);
/*!re2c
[=]+ [ ]* [\n] { return 1; }
[-]+ [ ]* [\n] { return 2; }
@@ -180,10 +180,10 @@ extern int scan_setext_header_line(bstring s, int pos)
// Scan a horizontal rule line: "...three or more hyphens, asterisks,
// or underscores on a line by themselves. If you wish, you may use
// spaces between the hyphens or asterisks."
-extern int scan_hrule(bstring s, int pos)
+extern int scan_hrule(const gh_buf *s, int pos)
{
unsigned char * marker = NULL;
- unsigned char * p = &(s->data[pos]);
+ unsigned char * p = &(s->ptr[pos]);
unsigned char * start = p;
/*!re2c
([*][ ]*){3,} [ \t]* [\n] { return (p - start); }
@@ -194,10 +194,10 @@ extern int scan_hrule(bstring s, int pos)
}
// Scan an opening code fence.
-extern int scan_open_code_fence(bstring s, int pos)
+extern int scan_open_code_fence(const gh_buf *s, int pos)
{
unsigned char * marker = NULL;
- unsigned char * p = &(s->data[pos]);
+ unsigned char * p = &(s->ptr[pos]);
unsigned char * start = p;
/*!re2c
[`]{3,} / [^`\n\x00]*[\n] { return (p - start); }
@@ -207,10 +207,10 @@ extern int scan_open_code_fence(bstring s, int pos)
}
// Scan a closing code fence with length at least len.
-extern int scan_close_code_fence(bstring s, int pos, int len)
+extern int scan_close_code_fence(const gh_buf *s, int pos, int len)
{
unsigned char * marker = NULL;
- unsigned char * p = &(s->data[pos]);
+ unsigned char * p = &(s->ptr[pos]);
unsigned char * start = p;
/*!re2c
([`]{3,} | [~]{3,}) / spacechar* [\n]
@@ -225,10 +225,10 @@ extern int scan_close_code_fence(bstring s, int pos, int len)
// Scans an entity.
// Returns number of chars matched.
-extern int scan_entity(bstring s, int pos)
+extern int scan_entity(const gh_buf *s, int pos)
{
unsigned char * marker = NULL;
- unsigned char * p = &(s->data[pos]);
+ unsigned char * p = &(s->ptr[pos]);
unsigned char * start = p;
/*!re2c
[&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;]
diff --git a/src/stmd.h b/src/stmd.h
index 5e34399..eb1b989 100644
--- a/src/stmd.h
+++ b/src/stmd.h
@@ -1,38 +1,38 @@
#include <stdbool.h>
-#include "bstrlib.h"
+#include "buffer.h"
#include "uthash.h"
#define VERSION "0.1"
#define CODE_INDENT 4
+typedef struct {
+ const unsigned char *data;
+ int len;
+ int alloc;
+} chunk;
+
typedef struct Inline {
- enum { str, softbreak, linebreak, code, raw_html, entity,
- emph, strong, link, image } tag;
- union {
- bstring literal;
- struct Inline* inlines;
- struct { struct Inline* label;
- bstring url;
- bstring title;
- } linkable;
- } content;
- struct Inline* next;
+ enum { str, softbreak, linebreak, code, raw_html, entity,
+ emph, strong, link, image } tag;
+ union {
+ chunk literal;
+ struct Inline *inlines;
+ struct {
+ struct Inline *label;
+ unsigned char *url;
+ unsigned char *title;
+ } linkable;
+ } content;
+ struct Inline *next;
} inl;
typedef struct Reference {
- bstring label;
- bstring url;
- bstring title;
+ unsigned char *label;
+ unsigned char *url;
+ unsigned char *title;
UT_hash_handle hh; // used by uthash
} reference;
-typedef struct Subject {
- bstring buffer;
- int pos;
- reference** reference_map;
- int label_nestlevel;
-} subject;
-
// Types for blocks
struct ListData {
@@ -51,7 +51,7 @@ struct FencedCodeData {
int fence_length;
int fence_offset;
char fence_char;
- bstring info;
+ gh_buf info;
};
typedef struct Block {
@@ -77,7 +77,8 @@ typedef struct Block {
struct Block* last_child;
struct Block* parent;
struct Block* top;
- bstring string_content;
+ gh_buf string_content;
+ int string_pos;
inl* inline_content;
union {
struct ListData list_data;
@@ -89,33 +90,34 @@ typedef struct Block {
struct Block * prev;
} block;
-int parse_inline(subject* subj, inl ** last);
-inl* parse_inlines(bstring input, reference** refmap);
-inl* parse_inlines_while(subject* subj, int (*f)(subject*));
+inl* parse_inlines(gh_buf *input, int input_pos, reference** refmap);
void free_inlines(inl* e);
-int parse_reference(bstring input, reference** refmap);
+
+int parse_reference(gh_buf *input, int input_pos, reference** refmap);
void free_reference(reference *ref);
void free_reference_map(reference **refmap);
-reference* make_reference(bstring label, bstring url, bstring title);
-reference* lookup_reference(reference** refmap, bstring label);
+
void add_reference(reference** refmap, reference* ref);
-int unescape(bstring s);
+void unescape_buffer(gh_buf *buf);
extern block* make_document();
extern block* add_child(block* parent,
int block_type, int start_line, int start_column);
void free_blocks(block* e);
+block *stmd_parse_document(const char *buffer, size_t len);
+
// FOR NOW:
-int process_inlines(block* cur, reference** refmap);
-int incorporate_line(bstring ln, int line_number, block** curptr);
-int finalize(block* b, int line_number);
+void process_inlines(block* cur, reference** refmap);
+void incorporate_line(gh_buf *ln, int line_number, block** curptr);
+void finalize(block* b, int line_number);
void print_inlines(inl* ils, int indent);
void print_blocks(block* blk, int indent);
-int blocks_to_html(block* b, bstring* result, bool tight);
-int inlines_to_html(inl* b, bstring* result);
+/* TODO */
+// int blocks_to_html(block* b, bstring* result, bool tight);
+// int inlines_to_html(inl* b, bstring* result);
-int bdetab(bstring s, int utf8);
+void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len);
diff --git a/src/utf8.c b/src/utf8.c
index 4bb3b35..1a5df9e 100644
--- a/src/utf8.c
+++ b/src/utf8.c
@@ -2,105 +2,142 @@
#include "bstrlib.h"
#include "debug.h"
-#define advance(s) \
- s++; \
- check(*s >> 6 == 0x02, "UTF-8 decode error on byte %x", *s);
-
-// Reads a unicode code point from a UTF8-encoded string, and
-// puts it in the pointer n. If something illegal
-// is encountered, 0xFFFD is emitted.
-// Returns a pointer to next position in string, or NULL if no
-// more characters remain.
-extern unsigned char * from_utf8(unsigned char * s, unsigned int *n)
+static const int8_t utf8proc_utf8class[256] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+ 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len)
{
- int x = 0;
-
- if (*s == 0) {
- return NULL;
- } else if (*s < 0x80) {
- x = *s;
- } else if (*s >> 5 == 0x06) {
- x = *s & 0x1F;
- advance(s);
- x = (x << 6) + (*s & 0x3F);
- } else if (*s >> 4 == 0x0E) {
- x = *s & 0x0F;
- advance(s);
- x = (x << 6) + (*s & 0x3F);
- advance(s);
- x = (x << 6) + (*s & 0x3F);
- } else if (*s >> 3 == 0x1E) {
- x = *s & 0x07;
- advance(s);
- x = (x << 6) + (*s & 0x3F);
- advance(s);
- x = (x << 6) + (*s & 0x3F);
- advance(s);
- x = (x << 6) + (*s & 0x3F);
- } else if (*s >> 2 == 0x3E) {
- x = *s & 0x03;
- advance(s);
- x = (x << 6) + (*s & 0x3F);
- advance(s);
- x = (x << 6) + (*s & 0x3F);
- advance(s);
- x = (x << 6) + (*s & 0x3F);
- advance(s);
- x = (x << 6) + (*s & 0x3F);
- } else {
- log_err("UTF-8 decode error on byte %x", *s);
- goto error;
- }
- *n = x;
- s++;
- return s;
- error:
- *n = 0xFFFD;
- return s;
+ ssize_t length, i;
+
+ if (!str_len)
+ return 0;
+
+ length = utf8proc_utf8class[str[0]];
+
+ if (!length)
+ return -1;
+
+ if (str_len >= 0 && length > str_len)
+ return -1;
+
+ for (i = 1; i < length; i++) {
+ if ((str[i] & 0xC0) != 0x80)
+ return -1;
+ }
+
+ return length;
+}
+
+ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst)
+{
+ ssize_t length;
+ int32_t uc = -1;
+
+ *dst = -1;
+ length = utf8proc_charlen(str, str_len);
+ if (length < 0)
+ return -1;
+
+ switch (length) {
+ case 1:
+ uc = str[0];
+ break;
+ case 2:
+ uc = ((str[0] & 0x1F) << 6) + (str[1] & 0x3F);
+ if (uc < 0x80) uc = -1;
+ break;
+ case 3:
+ uc = ((str[0] & 0x0F) << 12) + ((str[1] & 0x3F) << 6)
+ + (str[2] & 0x3F);
+ if (uc < 0x800 || (uc >= 0xD800 && uc < 0xE000) ||
+ (uc >= 0xFDD0 && uc < 0xFDF0)) uc = -1;
+ break;
+ case 4:
+ uc = ((str[0] & 0x07) << 18) + ((str[1] & 0x3F) << 12)
+ + ((str[2] & 0x3F) << 6) + (str[3] & 0x3F);
+ if (uc < 0x10000 || uc >= 0x110000) uc = -1;
+ break;
+ }
+
+ if (uc < 0 || ((uc & 0xFFFF) >= 0xFFFE))
+ return -1;
+
+ *dst = uc;
+ return length;
}
-// Converts the unicode code point c to UTF-8,
-// putting the result in dest. Returns 0 on success, -1 on error.
-extern int to_utf8(unsigned int c, bstring dest)
+void utf8_encode_char(int32_t uc, gh_buf *buf)
{
- if (c < 0x80) {
- bconchar(dest, c);
- } else if (c < 0x800) {
- bconchar(dest, 192 + c/64);
- bconchar(dest, 128 + c%64);
- } else if (c - 0xd800u < 0x800) {
- goto error;
- } else if (c < 0x10000) {
- bconchar(dest, 224 + c / 4096);
- bconchar(dest, 128 + c /64%64);
- bconchar(dest, 128 + c%64);
- } else if (c < 0x110000) {
- bconchar(dest, 240 + c/262144);
- bconchar(dest, 128 + c/4096%64);
- bconchar(dest, 128 + c/64%64);
- bconchar(dest, 128 + c%64);
- } else {
- goto error;
- }
- return 0;
-error:
- return -1;
+ char dst[4];
+ int len = 0;
+
+ if (uc < 0x00) {
+ assert(false);
+ } else if (uc < 0x80) {
+ dst[0] = uc;
+ len = 1;
+ } else if (uc < 0x800) {
+ dst[0] = 0xC0 + (uc >> 6);
+ dst[1] = 0x80 + (uc & 0x3F);
+ len = 2;
+ } else if (uc == 0xFFFF) {
+ dst[0] = 0xFF;
+ return 1;
+ } else if (uc == 0xFFFE) {
+ dst[0] = 0xFE;
+ len = 1;
+ } else if (uc < 0x10000) {
+ dst[0] = 0xE0 + (uc >> 12);
+ dst[1] = 0x80 + ((uc >> 6) & 0x3F);
+ dst[2] = 0x80 + (uc & 0x3F);
+ len = 3;
+ } else if (uc < 0x110000) {
+ dst[0] = 0xF0 + (uc >> 18);
+ dst[1] = 0x80 + ((uc >> 12) & 0x3F);
+ dst[2] = 0x80 + ((uc >> 6) & 0x3F);
+ dst[3] = 0x80 + (uc & 0x3F);
+ len = 4;
+ } else {
+ assert(false);
+ }
+
+ gh_buf_put(buf, dst, len);
}
+void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len)
+{
+ int32_t c;
+
#define bufpush(x) \
- check(to_utf8(x, buf) == 0, "UTF-8 encode error on code point %04x", x)
+ utf8proc_encode_char(x, dest)
-// Returns the case-folded version of the source string, or NULL on error.
-extern bstring case_fold(bstring source)
-{
- unsigned char * s = source->data;
- unsigned int c = 0;
- bstring buf = bfromcstr("");
- while ((s = from_utf8(s, &c))) {
-#include "case_fold_switch.c"
- }
- return buf;
-error:
- return NULL;
+ while (len > 0) {
+ ssize_t char_len = utf8proc_iterate(str, len, &c);
+
+ if (char_len < 0) {
+ bufpush(0xFFFD);
+ continue;
+ }
+
+#include "case_fold_switch.inc"
+
+ str += char_len;
+ len -= char_len;
+ }
}