aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNick Wellnhofer <wellnhofer@aevum.de>2014-11-29 17:05:00 +0100
committerNick Wellnhofer <wellnhofer@aevum.de>2014-11-29 17:16:31 +0100
commit46ec4da981af45591d25cb54c47a0afd061306c2 (patch)
treeeb1f9078be2db3db72a1d85804477f6255dc3baf
parentad8b39e550f2ff9a8b8e409d1654fc26135dc9fa (diff)
Push parser interface
Replace cmark_parser_process_line with cmark_parser_push that takes arbitrary chunks of data. Also fixes #211.
-rw-r--r--src/blocks.c94
-rw-r--r--src/cmark.h2
-rw-r--r--src/main.c12
-rw-r--r--src/parser.h1
4 files changed, 80 insertions, 29 deletions
diff --git a/src/blocks.c b/src/blocks.c
index 1bdf67b..dbfb21e 100644
--- a/src/blocks.c
+++ b/src/blocks.c
@@ -18,6 +18,14 @@
#define CODE_INDENT 4
#define peek_at(i, n) (i)->data[n]
+static void
+S_parser_push(cmark_parser *parser, const unsigned char *buffer, size_t len,
+ bool eof);
+
+static void
+S_process_line(cmark_parser *parser, const unsigned char *buffer,
+ size_t bytes);
+
static cmark_node* make_block(cmark_node_type tag, int start_line, int start_column)
{
cmark_node* e;
@@ -47,13 +55,16 @@ cmark_parser *cmark_parser_new()
cmark_parser *parser = (cmark_parser*)malloc(sizeof(cmark_parser));
cmark_node *document = make_document();
strbuf *line = (strbuf*)malloc(sizeof(strbuf));
+ strbuf *buf = (strbuf*)malloc(sizeof(strbuf));
cmark_strbuf_init(line, 256);
+ cmark_strbuf_init(buf, 0);
parser->refmap = cmark_reference_map_new();
parser->root = document;
parser->current = document;
parser->line_number = 0;
parser->curline = line;
+ parser->linebuf = buf;
return parser;
}
@@ -62,6 +73,8 @@ void cmark_parser_free(cmark_parser *parser)
{
cmark_strbuf_free(parser->curline);
free(parser->curline);
+ cmark_strbuf_free(parser->linebuf);
+ free(parser->linebuf);
cmark_reference_map_free(parser->refmap);
free(parser);
}
@@ -412,16 +425,16 @@ static cmark_node *finalize_document(cmark_parser *parser)
return parser->root;
}
-extern cmark_node *cmark_parse_file(FILE *f)
+cmark_node *cmark_parse_file(FILE *f)
{
- char buffer[4096];
+ unsigned char buffer[4096];
cmark_parser *parser = cmark_parser_new();
- size_t offset;
+ size_t bytes;
cmark_node *document;
- while (fgets(buffer, sizeof(buffer), f)) {
- offset = strlen(buffer);
- cmark_parser_process_line(parser, buffer, offset);
+ while ((bytes = fread(buffer, 1, sizeof(buffer), f)) > 0) {
+ bool eof = bytes < sizeof(buffer);
+ S_parser_push(parser, buffer, bytes, eof);
}
document = cmark_parser_finish(parser);
@@ -429,28 +442,61 @@ extern cmark_node *cmark_parse_file(FILE *f)
return document;
}
-extern cmark_node *cmark_parse_document(const char *buffer, size_t len)
+cmark_node *cmark_parse_document(const char *buffer, size_t len)
{
- int linenum = 1;
- const char *end = buffer + len;
- size_t offset;
cmark_parser *parser = cmark_parser_new();
cmark_node *document;
- while (buffer < end) {
- const char *eol
- = (const char *)memchr(buffer, '\n', end - buffer);
- offset = eol ? (eol - buffer) + 1 : end - buffer;
- cmark_parser_process_line(parser, buffer, offset);
- buffer += offset;
- linenum++;
- }
+ S_parser_push(parser, (const unsigned char *)buffer, len, true);
document = cmark_parser_finish(parser);
cmark_parser_free(parser);
return document;
}
+void
+cmark_parser_push(cmark_parser *parser, const char *buffer, size_t len)
+{
+ S_parser_push(parser, (const unsigned char *)buffer, len, false);
+}
+
+static void
+S_parser_push(cmark_parser *parser, const unsigned char *buffer, size_t len,
+ bool eof)
+{
+ const unsigned char *end = buffer + len;
+
+ while (buffer < end) {
+ const unsigned char *eol
+ = (const unsigned char *)memchr(buffer, '\n',
+ end - buffer);
+ size_t line_len;
+
+ if (eol) {
+ line_len = eol + 1 - buffer;
+ }
+ else if (eof) {
+ line_len = end - buffer;
+ }
+ else {
+ strbuf_put(parser->linebuf, buffer, end - buffer);
+ break;
+ }
+
+ if (parser->linebuf->size > 0) {
+ strbuf_put(parser->linebuf, buffer, line_len);
+ S_process_line(parser, parser->linebuf->ptr,
+ parser->linebuf->size);
+ strbuf_clear(parser->linebuf);
+ }
+ else {
+ S_process_line(parser, buffer, line_len);
+ }
+
+ buffer += line_len;
+ }
+}
+
static void chop_trailing_hashtags(chunk *ch)
{
int n, orig_n;
@@ -469,8 +515,8 @@ static void chop_trailing_hashtags(chunk *ch)
}
}
-void cmark_parser_process_line(cmark_parser *parser, const char *buffer,
- size_t bytes)
+static void
+S_process_line(cmark_parser *parser, const unsigned char *buffer, size_t bytes)
{
cmark_node* last_matched_container;
int offset = 0;
@@ -486,7 +532,7 @@ void cmark_parser_process_line(cmark_parser *parser, const char *buffer,
int indent;
chunk input;
- utf8proc_detab(parser->curline, (unsigned char *)buffer, bytes);
+ utf8proc_detab(parser->curline, buffer, bytes);
// Add a newline to the end if not present:
// TODO this breaks abstraction:
@@ -831,6 +877,12 @@ void cmark_parser_process_line(cmark_parser *parser, const char *buffer,
cmark_node *cmark_parser_finish(cmark_parser *parser)
{
+ if (parser->linebuf->size) {
+ S_process_line(parser, parser->linebuf->ptr,
+ parser->linebuf->size);
+ strbuf_clear(parser->linebuf);
+ }
+
finalize_document(parser);
strbuf_free(parser->curline);
#if CMARK_DEBUG_NODES
diff --git a/src/cmark.h b/src/cmark.h
index 7942645..ff0c58f 100644
--- a/src/cmark.h
+++ b/src/cmark.h
@@ -176,7 +176,7 @@ CMARK_EXPORT
cmark_node *cmark_parser_finish(cmark_parser *parser);
CMARK_EXPORT
-void cmark_parser_process_line(cmark_parser *parser, const char *buffer, size_t bytes);
+void cmark_parser_push(cmark_parser *parser, const char *buffer, size_t len);
CMARK_EXPORT
cmark_node *cmark_parse_document(const char *buffer, size_t len);
diff --git a/src/main.c b/src/main.c
index 3a7ea4c..87cc151 100644
--- a/src/main.c
+++ b/src/main.c
@@ -35,7 +35,7 @@ int main(int argc, char *argv[])
int *files;
char buffer[4096];
cmark_parser *parser;
- size_t offset;
+ size_t bytes;
cmark_node *document;
parser = cmark_parser_new();
@@ -69,9 +69,8 @@ int main(int argc, char *argv[])
}
start_timer();
- while (fgets((char *)buffer, sizeof(buffer), fp)) {
- offset = strlen((char *)buffer);
- cmark_parser_process_line(parser, buffer, offset);
+ while ((bytes = fread(buffer, 1, sizeof(buffer), fp)) > 0) {
+ cmark_parser_push(parser, buffer, bytes);
}
end_timer("processing lines");
@@ -85,9 +84,8 @@ int main(int argc, char *argv[])
exit(0);
*/
- while (fgets((char *)buffer, sizeof(buffer), stdin)) {
- offset = strlen((char *)buffer);
- cmark_parser_process_line(parser, buffer, offset);
+ while ((bytes = fread(buffer, 1, sizeof(buffer), stdin)) > 0) {
+ cmark_parser_push(parser, buffer, bytes);
}
}
diff --git a/src/parser.h b/src/parser.h
index e0ed507..9d65b67 100644
--- a/src/parser.h
+++ b/src/parser.h
@@ -17,6 +17,7 @@ struct cmark_parser {
struct cmark_node* current;
int line_number;
cmark_strbuf *curline;
+ cmark_strbuf *linebuf;
};
#ifdef __cplusplus