From 582674e662d1f8757350c51486a5e0a837195e15 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Tue, 2 Sep 2014 13:18:04 +0200 Subject: ffffix --- src/print.c | 307 +++++++++++++++++++++++++++++++----------------------------- 1 file changed, 157 insertions(+), 150 deletions(-) (limited to 'src/print.c') diff --git a/src/print.c b/src/print.c index a924870..3ebde16 100644 --- a/src/print.c +++ b/src/print.c @@ -1,168 +1,175 @@ #include #include -#include "bstrlib.h" +#include #include "stmd.h" #include "debug.h" -static bstring format_str(bstring s) +static void print_str(const unsigned char *s, int len) { - int pos = 0; - int len = blength(s); - bstring result = bfromcstr(""); - char c; - bformata(result, "\""); - while (pos < len) { - c = bchar(s, pos); - switch (c) { - case '\n': - bformata(result, "\\n"); - break; - case '"': - bformata(result, "\\\""); - break; - case '\\': - bformata(result, "\\\\"); - break; - default: - bformata(result, "%c", c); - } - pos++; - } - bformata(result, "\""); - return result; + int i; + + if (len < 0) + len = strlen(s); + + putchar('"'); + for (i = 0; i < len; ++i) { + unsigned char c = s[i]; + + switch (c) { + case '\n': + printf("\\n"); + break; + case '"': + printf("\\\""); + break; + case '\\': + printf("\\\\"); + break; + default: + putchar((int)c); + } + } + putchar('"'); } // Functions to pretty-print inline and block lists, for debugging. // Prettyprint an inline list, for debugging. extern void print_blocks(block* b, int indent) { - struct ListData * data; - while(b != NULL) { - // printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line); - for (int i=0; i < indent; i++) { - putchar(' '); - } - switch(b->tag) { - case document: - printf("document\n"); - print_blocks(b->children, indent + 2); - break; - case block_quote: - printf("block_quote\n"); - print_blocks(b->children, indent + 2); - break; - case list_item: - data = &(b->attributes.list_data); - printf("list_item\n"); - print_blocks(b->children, indent + 2); - break; - case list: - data = &(b->attributes.list_data); - if (data->list_type == ordered) { - printf("list (type=ordered tight=%s start=%d delim=%s)\n", - (data->tight ? "true" : "false"), - data->start, - (data->delimiter == parens ? "parens" : "period")); - } else { - printf("list (type=bullet tight=%s bullet_char=%c)\n", - (data->tight ? "true" : "false"), - data->bullet_char); - } - print_blocks(b->children, indent + 2); - break; - case atx_header: - printf("atx_header (level=%d)\n", b->attributes.header_level); - print_inlines(b->inline_content, indent + 2); - break; - case setext_header: - printf("setext_header (level=%d)\n", b->attributes.header_level); - print_inlines(b->inline_content, indent + 2); - break; - case paragraph: - printf("paragraph\n"); - print_inlines(b->inline_content, indent + 2); - break; - case hrule: - printf("hrule\n"); - break; - case indented_code: - printf("indented_code %s\n", format_str(b->string_content)->data); - break; - case fenced_code: - printf("fenced_code length=%d info=%s %s\n", - b->attributes.fenced_code_data.fence_length, - format_str(b->attributes.fenced_code_data.info)->data, - format_str(b->string_content)->data); - break; - case html_block: - printf("html_block %s\n", format_str(b->string_content)->data); - break; - case reference_def: - printf("reference_def\n"); - break; - default: - log_warn("block type %d not implemented\n", b->tag); - break; - } - b = b->next; - } + struct ListData *data; + + while(b != NULL) { + // printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line); + for (int i=0; i < indent; i++) { + putchar(' '); + } + + switch(b->tag) { + case document: + printf("document\n"); + print_blocks(b->children, indent + 2); + break; + case block_quote: + printf("block_quote\n"); + print_blocks(b->children, indent + 2); + break; + case list_item: + data = &(b->attributes.list_data); + printf("list_item\n"); + print_blocks(b->children, indent + 2); + break; + case list: + data = &(b->attributes.list_data); + if (data->list_type == ordered) { + printf("list (type=ordered tight=%s start=%d delim=%s)\n", + (data->tight ? "true" : "false"), + data->start, + (data->delimiter == parens ? "parens" : "period")); + } else { + printf("list (type=bullet tight=%s bullet_char=%c)\n", + (data->tight ? "true" : "false"), + data->bullet_char); + } + print_blocks(b->children, indent + 2); + break; + case atx_header: + printf("atx_header (level=%d)\n", b->attributes.header_level); + print_inlines(b->inline_content, indent + 2); + break; + case setext_header: + printf("setext_header (level=%d)\n", b->attributes.header_level); + print_inlines(b->inline_content, indent + 2); + break; + case paragraph: + printf("paragraph\n"); + print_inlines(b->inline_content, indent + 2); + break; + case hrule: + printf("hrule\n"); + break; + case indented_code: + printf("indented_code "); + print_str(b->string_content.ptr, -1); + putchar('\n'); + break; + case fenced_code: + printf("fenced_code length=%d info=", + b->attributes.fenced_code_data.fence_length); + print_str(b->attributes.fenced_code_data.info.ptr, -1); + putchar(' '); + print_str(b->string_content.ptr, -1); + putchar('\n'); + break; + case html_block: + printf("html_block "); + print_str(b->string_content.ptr, -1); + putchar('\n'); + break; + case reference_def: + printf("reference_def\n"); + break; + default: + printf("# NOT IMPLEMENTED (%d)\n", b->tag); + break; + } + b = b->next; + } } // Prettyprint an inline list, for debugging. extern void print_inlines(inl* ils, int indent) { - while(ils != NULL) { - /* - // we add 11 extra spaces for the line/column info - for (int i=0; i < 11; i++) { - putchar(' '); - } - putchar('|'); - putchar(' '); - */ - for (int i=0; i < indent; i++) { - putchar(' '); - } - switch(ils->tag) { - case str: - printf("str %s\n", format_str(ils->content.literal)->data); - break; - case linebreak: - printf("linebreak\n"); - break; - case softbreak: - printf("softbreak\n"); - break; - case code: - printf("code %s\n", format_str(ils->content.literal)->data); - break; - case raw_html: - printf("html %s\n", format_str(ils->content.literal)->data); - break; - case entity: - printf("entity %s\n", format_str(ils->content.literal)->data); - break; - case link: - printf("link url=%s title=%s\n", - format_str(ils->content.linkable.url)->data, - format_str(ils->content.linkable.title)->data); - print_inlines(ils->content.linkable.label, indent + 2); - break; - case image: - printf("image url=%s title=%s\n", - format_str(ils->content.linkable.url)->data, - format_str(ils->content.linkable.title)->data); - print_inlines(ils->content.linkable.label, indent + 2); - break; - case strong: - printf("strong\n"); - print_inlines(ils->content.linkable.label, indent + 2); - break; - case emph: - printf("emph\n"); - print_inlines(ils->content.linkable.label, indent + 2); - break; - } - ils = ils->next; - } + while(ils != NULL) { + for (int i=0; i < indent; i++) { + putchar(' '); + } + switch(ils->tag) { + case str: + printf("str "); + print_str(ils->content.literal.data, ils->content.literal.len); + putchar('\n'); + break; + case linebreak: + printf("linebreak\n"); + break; + case softbreak: + printf("softbreak\n"); + break; + case code: + printf("code "); + print_str(ils->content.literal.data, ils->content.literal.len); + putchar('\n'); + break; + case raw_html: + printf("html "); + print_str(ils->content.literal.data, ils->content.literal.len); + putchar('\n'); + break; + case entity: + printf("entity "); + print_str(ils->content.literal.data, ils->content.literal.len); + putchar('\n'); + break; + case link: + case image: + printf("%s url=", ils->tag == link ? "link" : "image"); + print_str(ils->content.linkable.url, -1); + if (ils->content.linkable.title) { + printf(" title="); + print_str(ils->content.linkable.title, -1); + } + putchar('\n'); + print_inlines(ils->content.linkable.label, indent + 2); + break; + case strong: + printf("strong\n"); + print_inlines(ils->content.linkable.label, indent + 2); + break; + case emph: + printf("emph\n"); + print_inlines(ils->content.linkable.label, indent + 2); + break; + } + ils = ils->next; + } } -- cgit v1.2.3 From 24248c0f1a6de6f229890c5c03aeff8738214fee Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Tue, 2 Sep 2014 13:30:13 +0200 Subject: Rename inlines --- src/inlines.c | 50 +++++++++++++++++++++++++------------------------- src/print.c | 22 +++++++++++----------- src/stmd.h | 4 ++-- 3 files changed, 38 insertions(+), 38 deletions(-) (limited to 'src/print.c') diff --git a/src/inlines.c b/src/inlines.c index 82c7219..b9ece0e 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -151,15 +151,15 @@ inline static inl* make_simple(int t) } // Macros for creating various kinds of inlines. -#define make_str(s) make_literal(str, s) -#define make_code(s) make_literal(code, s) -#define make_raw_html(s) make_literal(raw_html, s) -#define make_entity(s) make_literal(entity, s) -#define make_linebreak() make_simple(linebreak) -#define make_softbreak() make_simple(softbreak) -#define make_link(label, url, title) make_linkable(link, label, url, title) -#define make_emph(contents) make_inlines(emph, contents) -#define make_strong(contents) make_inlines(strong, contents) +#define make_str(s) make_literal(INL_STRING, s) +#define make_code(s) make_literal(INL_CODE, s) +#define make_raw_html(s) make_literal(INL_RAW_HTML, s) +#define make_entity(s) make_literal(INL_ENTITY, s) +#define make_linebreak() make_simple(INL_LINEBREAK) +#define make_softbreak() make_simple(INL_SOFTBREAK) +#define make_link(label, url, title) make_linkable(INL_LINK, label, url, title) +#define make_emph(contents) make_inlines(INL_EMPH, contents) +#define make_strong(contents) make_inlines(INL_STRONG, contents) // Free an inline list. extern void free_inlines(inl* e) @@ -167,23 +167,23 @@ extern void free_inlines(inl* e) inl * next; while (e != NULL) { switch (e->tag){ - case str: - case raw_html: - case code: - case entity: + case INL_STRING: + case INL_RAW_HTML: + case INL_CODE: + case INL_ENTITY: chunk_free(&e->content.literal); break; - case linebreak: - case softbreak: + case INL_LINEBREAK: + case INL_SOFTBREAK: break; - case link: - case image: + case INL_LINK: + case INL_IMAGE: free(e->content.linkable.url); free(e->content.linkable.title); free_inlines(e->content.linkable.label); break; - case emph: - case strong: + case INL_EMPH: + case INL_STRONG: free_inlines(e->content.inlines); break; default: @@ -454,7 +454,7 @@ static inl* handle_strong_emph(subject* subj, char c) numdelims = scan_delims(subj, c, &can_open, &can_close); if (numdelims >= 1 && can_close) { subj->pos += 1; - first_head->tag = emph; + first_head->tag = INL_EMPH; chunk_free(&first_head->content.literal); first_head->content.inlines = first_head->next; first_head->next = NULL; @@ -471,7 +471,7 @@ static inl* handle_strong_emph(subject* subj, char c) numdelims = scan_delims(subj, c, &can_open, &can_close); if (numdelims >= 2 && can_close) { subj->pos += 2; - first_head->tag = strong; + first_head->tag = INL_STRONG; chunk_free(&first_head->content.literal); first_head->content.inlines = first_head->next; first_head->next = NULL; @@ -502,10 +502,10 @@ static inl* handle_strong_emph(subject* subj, char c) } subj->pos += numdelims; if (first_close) { - first_head->tag = first_close_delims == 1 ? strong : emph; + first_head->tag = first_close_delims == 1 ? INL_STRONG : INL_EMPH; chunk_free(&first_head->content.literal); first_head->content.inlines = - make_inlines(first_close_delims == 1 ? emph : strong, + make_inlines(first_close_delims == 1 ? INL_EMPH : INL_STRONG, first_head->next); il = first_head->next; @@ -989,8 +989,8 @@ static int parse_inline(subject* subj, inl ** last) advance(subj); if (peek_char(subj) == '[') { new = handle_left_bracket(subj); - if (new != NULL && new->tag == link) { - new->tag = image; + if (new != NULL && new->tag == INL_LINK) { + new->tag = INL_IMAGE; } else { new = append_inlines(make_str(chunk_literal("!")), new); } diff --git a/src/print.c b/src/print.c index 3ebde16..0a87925 100644 --- a/src/print.c +++ b/src/print.c @@ -124,35 +124,35 @@ extern void print_inlines(inl* ils, int indent) putchar(' '); } switch(ils->tag) { - case str: + case INL_STRING: printf("str "); print_str(ils->content.literal.data, ils->content.literal.len); putchar('\n'); break; - case linebreak: + case INL_LINEBREAK: printf("linebreak\n"); break; - case softbreak: + case INL_SOFTBREAK: printf("softbreak\n"); break; - case code: + case INL_CODE: printf("code "); print_str(ils->content.literal.data, ils->content.literal.len); putchar('\n'); break; - case raw_html: + case INL_RAW_HTML: printf("html "); print_str(ils->content.literal.data, ils->content.literal.len); putchar('\n'); break; - case entity: + case INL_ENTITY: printf("entity "); print_str(ils->content.literal.data, ils->content.literal.len); putchar('\n'); break; - case link: - case image: - printf("%s url=", ils->tag == link ? "link" : "image"); + case INL_LINK: + case INL_IMAGE: + printf("%s url=", ils->tag == INL_LINK ? "link" : "image"); print_str(ils->content.linkable.url, -1); if (ils->content.linkable.title) { printf(" title="); @@ -161,11 +161,11 @@ extern void print_inlines(inl* ils, int indent) putchar('\n'); print_inlines(ils->content.linkable.label, indent + 2); break; - case strong: + case INL_STRONG: printf("strong\n"); print_inlines(ils->content.linkable.label, indent + 2); break; - case emph: + case INL_EMPH: printf("emph\n"); print_inlines(ils->content.linkable.label, indent + 2); break; diff --git a/src/stmd.h b/src/stmd.h index dc24235..1e490d6 100644 --- a/src/stmd.h +++ b/src/stmd.h @@ -12,8 +12,8 @@ typedef struct { } chunk; typedef struct Inline { - enum { str, softbreak, linebreak, code, raw_html, entity, - emph, strong, link, image } tag; + enum { INL_STRING, INL_SOFTBREAK, INL_LINEBREAK, INL_CODE, INL_RAW_HTML, INL_ENTITY, + INL_EMPH, INL_STRONG, INL_LINK, INL_IMAGE } tag; union { chunk literal; struct Inline *inlines; -- cgit v1.2.3 From a7314deae649646f1f7ce5ede972641b5b62538c Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Wed, 3 Sep 2014 03:40:23 +0200 Subject: 338/103 --- Makefile | 4 +- src/blocks.c | 173 +++++++++++++++++----------------- src/buffer.c | 26 ++++- src/buffer.h | 2 +- src/html/houdini_href_e.c | 10 +- src/html/houdini_html_e.c | 10 +- src/html/html.c | 4 +- src/inlines.c | 235 ++++++++++++++++++---------------------------- src/print.c | 2 +- src/scanners.h | 28 +++--- src/scanners.re | 85 +++++++---------- src/stmd.h | 16 ++-- 12 files changed, 261 insertions(+), 334 deletions(-) (limited to 'src/print.c') diff --git a/Makefile b/Makefile index d14a928..89ec68c 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -CFLAGS=-ggdb3 -O0 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS) -LDFLAGS=-ggdb3 -O0 -Wall -Werror +CFLAGS=-ggdb3 -O0 -Wall -Wextra -Wno-unused-variable -std=c99 -Isrc $(OPTFLAGS) +LDFLAGS=-ggdb3 -O0 -Wall -Wno-unused-variable # -Werror SRCDIR=src DATADIR=data diff --git a/src/blocks.c b/src/blocks.c index 42f20db..94ff986 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -8,6 +8,8 @@ #include "scanners.h" #include "uthash.h" +#define peek_at(i, n) (i)->data[n] + static void incorporate_line(gh_buf *ln, int line_number, block** curptr); static void finalize(block* b, int line_number); @@ -27,7 +29,6 @@ static block* make_block(int tag, int start_line, int start_column) e->top = NULL; e->attributes.refmap = NULL; gh_buf_init(&e->string_content, 32); - e->string_pos = 0; e->inline_content = NULL; e->next = NULL; e->prev = NULL; @@ -80,10 +81,10 @@ static inline bool accepts_lines(int block_type) block_type == fenced_code); } -static void add_line(block* block, gh_buf *ln, int offset) +static void add_line(block* block, chunk *ch, int offset) { assert(block->open); - gh_buf_put(&block->string_content, ln->ptr + offset, ln->size - offset); + gh_buf_put(&block->string_content, ch->data + offset, ch->len - offset); } static void remove_trailing_blank_lines(gh_buf *ln) @@ -104,7 +105,7 @@ static void remove_trailing_blank_lines(gh_buf *ln) i = gh_buf_strchr(ln, '\n', i); if (i >= 0) - gh_buf_truncate(ln, i + 1); + gh_buf_truncate(ln, i); } // Check to see if a block ends with a blank line, descending @@ -162,12 +163,12 @@ static void finalize(block* b, int line_number) switch (b->tag) { case paragraph: pos = 0; - while (gh_buf_at(&b->string_content, b->string_pos) == '[' && - (pos = parse_reference(&b->string_content, b->string_pos, - b->top->attributes.refmap))) { - b->string_pos = pos; + while (gh_buf_at(&b->string_content, 0) == '[' && + (pos = parse_reference(&b->string_content, b->top->attributes.refmap))) { + + gh_buf_drop(&b->string_content, pos); } - if (is_blank(&b->string_content, b->string_pos)) { + if (is_blank(&b->string_content, 0)) { b->tag = reference_def; } break; @@ -179,14 +180,16 @@ static void finalize(block* b, int line_number) case fenced_code: // first line of contents becomes info - firstlinelen = gh_buf_strchr(&b->string_content, '\n', b->string_pos); + firstlinelen = gh_buf_strchr(&b->string_content, '\n', 0); + + gh_buf_init(&b->attributes.fenced_code_data.info, 0); gh_buf_set( &b->attributes.fenced_code_data.info, - b->string_content.ptr + b->string_pos, + b->string_content.ptr, firstlinelen ); - b->string_pos = firstlinelen + 1; + gh_buf_drop(&b->string_content, firstlinelen + 1); gh_buf_trim(&b->attributes.fenced_code_data.info); unescape_buffer(&b->attributes.fenced_code_data.info); @@ -281,7 +284,7 @@ void process_inlines(block* cur, reference** refmap) case paragraph: case atx_header: case setext_header: - cur->inline_content = parse_inlines(&cur->string_content, cur->string_pos, refmap); + cur->inline_content = parse_inlines(&cur->string_content, refmap); // MEM // gh_buf_free(&cur->string_content); break; @@ -300,19 +303,18 @@ void process_inlines(block* cur, reference** refmap) // Attempts to parse a list item marker (bullet or enumerated). // On success, returns length of the marker, and populates // data with the details. On failure, returns 0. -static int parse_list_marker(gh_buf *ln, int pos, - struct ListData ** dataptr) +static int parse_list_marker(chunk *input, int pos, struct ListData ** dataptr) { - char c; + unsigned char c; int startpos; struct ListData * data; startpos = pos; - c = gh_buf_at(ln, pos); + c = peek_at(input, pos); - if ((c == '*' || c == '-' || c == '+') && !scan_hrule(ln, pos)) { + if ((c == '*' || c == '-' || c == '+') && !scan_hrule(input, pos)) { pos++; - if (!isspace(gh_buf_at(ln, pos))) { + if (!isspace(peek_at(input, pos))) { return 0; } data = malloc(sizeof(struct ListData)); @@ -327,14 +329,14 @@ static int parse_list_marker(gh_buf *ln, int pos, int start = 0; do { - start = (10 * start) + (gh_buf_at(ln, pos) - '0'); + start = (10 * start) + (peek_at(input, pos) - '0'); pos++; - } while (isdigit(gh_buf_at(ln, pos))); + } while (isdigit(peek_at(input, pos))); - c = gh_buf_at(ln, pos); + c = peek_at(input, pos); if (c == '.' || c == ')') { pos++; - if (!isspace(gh_buf_at(ln, pos))) { + if (!isspace(peek_at(input, pos))) { return 0; } data = malloc(sizeof(struct ListData)); @@ -449,8 +451,26 @@ extern block *stmd_parse_document(const unsigned char *buffer, size_t len) return finalize_document(document, linenum); } +static void chop_trailing_hashtags(chunk *ch) +{ + int n; + + chunk_rtrim(ch); + n = ch->len - 1; + + // if string ends in #s, remove these: + while (n >= 0 && peek_at(ch, n) == '#') + n--; + + // the last # was escaped, so we include it. + if (n >= 0 && peek_at(ch, n) == '\\') + n++; + + ch->len = n + 1; +} + // Process one line at a time, modifying a block. -static void incorporate_line(gh_buf *ln, int line_number, block** curptr) +static void incorporate_line(gh_buf *line, int line_number, block** curptr) { block* last_matched_container; int offset = 0; @@ -464,6 +484,10 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr) bool blank = false; int first_nonspace; int indent; + chunk input; + + input.data = line->ptr; + input.len = line->size; // container starts at the document root. container = cur->top; @@ -475,21 +499,19 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr) container = container->last_child; first_nonspace = offset; - while (gh_buf_at(ln, first_nonspace) == ' ') { + while (peek_at(&input, first_nonspace) == ' ') { first_nonspace++; } indent = first_nonspace - offset; - blank = gh_buf_at(ln, first_nonspace) == '\n'; + blank = peek_at(&input, first_nonspace) == '\n'; if (container->tag == block_quote) { - - matched = indent <= 3 && gh_buf_at(ln, first_nonspace) == '>'; + matched = indent <= 3 && peek_at(&input, first_nonspace) == '>'; if (matched) { offset = first_nonspace + 1; - if (gh_buf_at(ln, offset) == ' ') { + if (peek_at(&input, offset) == ' ') offset++; - } } else { all_matched = false; } @@ -526,7 +548,7 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr) // skip optional spaces of fence offset i = container->attributes.fenced_code_data.fence_offset; - while (i > 0 && gh_buf_at(ln, offset) == ' ') { + while (i > 0 && peek_at(&input, offset) == ' ') { offset++; i--; } @@ -564,15 +586,13 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr) container->tag != html_block) { first_nonspace = offset; - while (gh_buf_at(ln, first_nonspace) == ' ') { + while (peek_at(&input, first_nonspace) == ' ') first_nonspace++; - } indent = first_nonspace - offset; - blank = gh_buf_at(ln, first_nonspace) == '\n'; + blank = peek_at(&input, first_nonspace) == '\n'; if (indent >= CODE_INDENT) { - if (cur->tag != paragraph && !blank) { offset += CODE_INDENT; container = add_child(container, indented_code, line_number, offset + 1); @@ -580,76 +600,70 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr) break; } - } else if (gh_buf_at(ln, first_nonspace) == '>') { + } else if (peek_at(&input, first_nonspace) == '>') { offset = first_nonspace + 1; // optional following character - if (gh_buf_at(ln, offset) == ' ') { + if (peek_at(&input, offset) == ' ') offset++; - } container = add_child(container, block_quote, line_number, offset + 1); - } else if ((matched = scan_atx_header_start(ln, first_nonspace))) { + } else if ((matched = scan_atx_header_start(&input, first_nonspace))) { offset = first_nonspace + matched; container = add_child(container, atx_header, line_number, offset + 1); - int hashpos = gh_buf_strchr(ln, '#', first_nonspace); - assert(hashpos >= 0); - + int hashpos = chunk_strchr(&input, '#', first_nonspace); int level = 0; - while (gh_buf_at(ln, hashpos) == '#') { + + while (peek_at(&input, hashpos) == '#') { level++; hashpos++; } container->attributes.header_level = level; - } else if ((matched = scan_open_code_fence(ln, first_nonspace))) { + } else if ((matched = scan_open_code_fence(&input, first_nonspace))) { - container = add_child(container, fenced_code, line_number, - first_nonspace + 1); - container->attributes.fenced_code_data.fence_char = gh_buf_at(ln, - first_nonspace); + container = add_child(container, fenced_code, line_number, first_nonspace + 1); + container->attributes.fenced_code_data.fence_char = peek_at(&input, first_nonspace); container->attributes.fenced_code_data.fence_length = matched; - container->attributes.fenced_code_data.fence_offset = - first_nonspace - offset; + container->attributes.fenced_code_data.fence_offset = first_nonspace - offset; offset = first_nonspace + matched; - } else if ((matched = scan_html_block_tag(ln, first_nonspace))) { + } else if ((matched = scan_html_block_tag(&input, first_nonspace))) { - container = add_child(container, html_block, line_number, - first_nonspace + 1); + container = add_child(container, html_block, line_number, first_nonspace + 1); // note, we don't adjust offset because the tag is part of the text } else if (container->tag == paragraph && - (lev = scan_setext_header_line(ln, first_nonspace)) && + (lev = scan_setext_header_line(&input, first_nonspace)) && // check that there is only one line in the paragraph: gh_buf_strrchr(&container->string_content, '\n', gh_buf_len(&container->string_content) - 2) < 0) { container->tag = setext_header; container->attributes.header_level = lev; - offset = gh_buf_len(ln) - 1; + offset = input.len - 1; } else if (!(container->tag == paragraph && !all_matched) && - (matched = scan_hrule(ln, first_nonspace))) { + (matched = scan_hrule(&input, first_nonspace))) { // it's only now that we know the line is not part of a setext header: container = add_child(container, hrule, line_number, first_nonspace + 1); finalize(container, line_number); container = container->parent; - offset = gh_buf_len(ln) - 1; + offset = input.len - 1; - } else if ((matched = parse_list_marker(ln, first_nonspace, &data))) { + } else if ((matched = parse_list_marker(&input, first_nonspace, &data))) { // compute padding: offset = first_nonspace + matched; i = 0; - while (i <= 5 && gh_buf_at(ln, offset + i) == ' ') { + while (i <= 5 && peek_at(&input, offset + i) == ' ') { i++; } // i = number of spaces after marker, up to 5 - if (i >= 5 || i < 1 || gh_buf_at(ln, offset) == '\n') { + if (i >= 5 || i < 1 || peek_at(&input, offset) == '\n') { data->padding = matched + 1; if (i > 0) { offset += 1; @@ -674,6 +688,7 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr) // add the list item container = add_child(container, list_item, line_number, first_nonspace + 1); + /* TODO: static */ container->attributes.list_data = *data; free(data); @@ -691,12 +706,11 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr) // appropriate container. first_nonspace = offset; - while (gh_buf_at(ln, first_nonspace) == ' ') { + while (peek_at(&input, first_nonspace) == ' ') first_nonspace++; - } indent = first_nonspace - offset; - blank = gh_buf_at(ln, first_nonspace) == '\n'; + blank = peek_at(&input, first_nonspace) == '\n'; // block quote lines are never blank as they start with > // and we don't count blanks in fenced code for purposes of tight/loose @@ -721,13 +735,12 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr) cur->tag == paragraph && gh_buf_len(&cur->string_content) > 0) { - add_line(cur, ln, offset); + add_line(cur, &input, offset); } else { // not a lazy continuation // finalize any blocks that were not matched and set cur to container: while (cur != last_matched_container) { - finalize(cur, line_number); cur = cur->parent; assert(cur != NULL); @@ -735,58 +748,46 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr) if (container->tag == indented_code) { - add_line(container, ln, offset); + add_line(container, &input, offset); } else if (container->tag == fenced_code) { matched = (indent <= 3 - && gh_buf_at(ln, first_nonspace) == container->attributes.fenced_code_data.fence_char) - && scan_close_code_fence(ln, first_nonspace, + && peek_at(&input, first_nonspace) == container->attributes.fenced_code_data.fence_char) + && scan_close_code_fence(&input, first_nonspace, container->attributes.fenced_code_data.fence_length); if (matched) { // if closing fence, don't add line to container; instead, close it: finalize(container, line_number); container = container->parent; // back up to parent } else { - add_line(container, ln, offset); + add_line(container, &input, offset); } } else if (container->tag == html_block) { - add_line(container, ln, offset); + add_line(container, &input, offset); } else if (blank) { // ??? do nothing } else if (container->tag == atx_header) { - // chop off trailing ###s...use a scanner? - gh_buf_trim(ln); - int p = gh_buf_len(ln) - 1; - - // if string ends in #s, remove these: - while (gh_buf_at(ln, p) == '#') { - p--; - } - if (gh_buf_at(ln, p) == '\\') { - // the last # was escaped, so we include it. - p++; - } - gh_buf_truncate(ln, p + 1); - add_line(container, ln, first_nonspace); + chop_trailing_hashtags(&input); + add_line(container, &input, first_nonspace); finalize(container, line_number); container = container->parent; } else if (accepts_lines(container->tag)) { - add_line(container, ln, first_nonspace); + add_line(container, &input, first_nonspace); } else if (container->tag != hrule && container->tag != setext_header) { // create paragraph container for line container = add_child(container, paragraph, line_number, first_nonspace + 1); - add_line(container, ln, first_nonspace); + add_line(container, &input, first_nonspace); } else { assert(false); diff --git a/src/buffer.c b/src/buffer.c index cfc6a7e..dc4a405 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -95,7 +95,7 @@ void gh_buf_clear(gh_buf *buf) int gh_buf_set(gh_buf *buf, const unsigned char *data, int len) { - if (len == 0 || data == NULL) { + if (len <= 0 || data == NULL) { gh_buf_clear(buf); } else { if (data != buf->ptr) { @@ -125,6 +125,9 @@ int gh_buf_putc(gh_buf *buf, int c) int gh_buf_put(gh_buf *buf, const unsigned char *data, int len) { + if (len <= 0) + return 0; + ENSURE_SIZE(buf, buf->size + len + 1); memmove(buf->ptr + buf->size, data, len); buf->size += len; @@ -272,15 +275,28 @@ void gh_buf_truncate(gh_buf *buf, int len) } } +void gh_buf_drop(gh_buf *buf, int n) +{ + if (n > 0) { + buf->size = buf->size - n; + if (buf->size) + memmove(buf->ptr, buf->ptr + n, buf->size); + + buf->ptr[buf->size] = '\0'; + } +} + void gh_buf_trim(gh_buf *buf) { - /* TODO: leading whitespace? */ - /* + int i = 0; + + if (!buf->size) + return; + while (i < buf->size && isspace(buf->ptr[i])) i++; - gh_buf_truncate(buf, i); - */ + gh_buf_drop(buf, i); /* rtrim */ while (buf->size > 0) { diff --git a/src/buffer.h b/src/buffer.h index 422ef02..0d5143e 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -105,8 +105,8 @@ extern void gh_buf_clear(gh_buf *buf); int gh_buf_strchr(const gh_buf *buf, int c, int pos); int gh_buf_strrchr(const gh_buf *buf, int c, int pos); +void gh_buf_drop(gh_buf *buf, int n); void gh_buf_truncate(gh_buf *buf, int len); -void gh_buf_ltruncate(gh_buf *buf, int len); void gh_buf_trim(gh_buf *buf); #endif diff --git a/src/html/houdini_href_e.c b/src/html/houdini_href_e.c index 59fe850..b2a7d79 100644 --- a/src/html/houdini_href_e.c +++ b/src/html/houdini_href_e.c @@ -62,16 +62,8 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size) while (i < size && HREF_SAFE[src[i]] != 0) i++; - if (likely(i > org)) { - if (unlikely(org == 0)) { - if (i >= size) - return 0; - - gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size)); - } - + if (likely(i > org)) gh_buf_put(ob, src + org, i - org); - } /* escaping */ if (i >= size) diff --git a/src/html/houdini_html_e.c b/src/html/houdini_html_e.c index 316c5ce..5cdd3dd 100644 --- a/src/html/houdini_html_e.c +++ b/src/html/houdini_html_e.c @@ -54,16 +54,8 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure) while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0) i++; - if (i > org) { - if (unlikely(org == 0)) { - if (i >= size) - return 0; - - gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size)); - } - + if (i > org) gh_buf_put(ob, src + org, i - org); - } /* escaping */ if (unlikely(i >= size)) diff --git a/src/html/html.c b/src/html/html.c index 2f160ca..27ffe58 100644 --- a/src/html/html.c +++ b/src/html/html.c @@ -68,7 +68,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight) cr(html); gh_buf_puts(html, "
  • "); blocks_to_html(html, b->children, tight); - gh_buf_trim(html); + gh_buf_trim(html); /* TODO: rtrim */ gh_buf_puts(html, "
  • "); cr(html); break; @@ -106,7 +106,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight) cr(html); gh_buf_puts(html, "
    ");
     				escape_html(html, b->string_content.ptr, b->string_content.size);
    -				gh_buf_puts(html, "
    "); + gh_buf_puts(html, ""); cr(html); break; diff --git a/src/inlines.c b/src/inlines.c index 7b48ad9..ef27a24 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -9,10 +9,10 @@ #include "scanners.h" typedef struct Subject { - const gh_buf *buffer; - int pos; - reference** reference_map; - int label_nestlevel; + chunk input; + int pos; + int label_nestlevel; + reference** reference_map; } subject; reference* lookup_reference(reference** refmap, chunk *label); @@ -27,12 +27,16 @@ inline static void chunk_trim(chunk *c); inline static chunk chunk_literal(const char *data); inline static chunk chunk_buf_detach(gh_buf *buf); -inline static chunk chunk_buf(const gh_buf *buf, int pos, int len); +inline static chunk chunk_dup(const chunk *ch, int pos, int len); static inl *parse_chunk_inlines(chunk *chunk, reference** refmap); static inl *parse_inlines_while(subject* subj, int (*f)(subject*)); static int parse_inline(subject* subj, inl ** last); +static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap); +static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap); +static int subject_find_special_char(subject *subj); + extern void free_reference(reference *ref) { free(ref->label); free(ref->url); @@ -101,10 +105,12 @@ extern reference* make_reference(chunk *label, chunk *url, chunk *title) extern void add_reference(reference** refmap, reference* ref) { reference * t = NULL; - HASH_FIND(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), t); + const char *label = (const char *)ref->label; + + HASH_FIND(hh, *refmap, label, strlen(label), t); if (t == NULL) { - HASH_ADD_KEYPTR(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), ref); + HASH_ADD_KEYPTR(hh, *refmap, label, strlen(label), ref); } else { free_reference(ref); // we free this now since it won't be in the refmap } @@ -210,87 +216,49 @@ inline static inl* append_inlines(inl* a, inl* b) return a; } -// Make a 'subject' from an input string. -static void init_subject(subject *e, gh_buf *buffer, int input_pos, reference** refmap) +static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap) { - e->buffer = buffer; - e->pos = input_pos; + e->input.data = buffer->ptr; + e->input.len = buffer->size; + e->input.alloc = 0; + e->pos = 0; e->label_nestlevel = 0; e->reference_map = refmap; -} - -inline static int isbacktick(int c) -{ - return (c == '`'); -} - -inline static void chunk_free(chunk *c) -{ - if (c->alloc) - free((char *)c->data); - - c->data = NULL; - c->alloc = 0; - c->len = 0; -} - -inline static void chunk_trim(chunk *c) -{ - while (c->len && isspace(c->data[0])) { - c->data++; - c->len--; - } - - while (c->len > 0) { - if (!isspace(c->data[c->len - 1])) - break; - c->len--; - } + chunk_rtrim(&e->input); } -inline static unsigned char *chunk_to_cstr(chunk *c) +static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap) { - unsigned char *str; - - str = malloc(c->len + 1); - memcpy(str, c->data, c->len); - str[c->len] = 0; + e->input.data = chunk->data; + e->input.len = chunk->len; + e->input.alloc = 0; + e->pos = 0; + e->label_nestlevel = 0; + e->reference_map = refmap; - return str; + chunk_rtrim(&e->input); } -inline static chunk chunk_literal(const char *data) +inline static int isbacktick(int c) { - chunk c = {data, data ? strlen(data) : 0, 0}; - return c; + return (c == '`'); } -inline static chunk chunk_buf(const gh_buf *buf, int pos, int len) +static inline unsigned char peek_char(subject *subj) { - chunk c = {buf->ptr + pos, len, 0}; - return c; + return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0; } -inline static chunk chunk_buf_detach(gh_buf *buf) +static inline unsigned char peek_at(subject *subj, int pos) { - chunk c; - - c.len = buf->size; - c.data = gh_buf_detach(buf); - c.alloc = 1; - - return c; + return subj->input.data[pos]; } -// Return the next character in the subject, without advancing. -// Return 0 if at the end of the subject. -#define peek_char(subj) gh_buf_at((subj)->buffer, (subj)->pos) - // Return true if there are more characters in the subject. inline static int is_eof(subject* subj) { - return (subj->pos >= gh_buf_len(subj->buffer)); + return (subj->pos >= subj->input.len); } // Advance the subject. Doesn't check for eof. @@ -308,7 +276,7 @@ inline static chunk take_while(subject* subj, int (*f)(int)) len++; } - return chunk_buf(subj->buffer, startpos, len); + return chunk_dup(&subj->input, startpos, len); } // Try to process a backtick code span that began with a @@ -388,7 +356,7 @@ static inl* handle_backticks(subject *subj) } else { gh_buf buf = GH_BUF_INIT; - gh_buf_set(&buf, subj->buffer->ptr + startpos, endpos - startpos - openticks.len); + gh_buf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len); gh_buf_trim(&buf); normalize_whitespace(&buf); @@ -404,7 +372,7 @@ static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close) char char_before, char_after; int startpos = subj->pos; - char_before = subj->pos == 0 ? '\n' : gh_buf_at(subj->buffer, subj->pos - 1); + char_before = subj->pos == 0 ? '\n' : peek_at(subj, subj->pos - 1); while (peek_char(subj) == c) { numdelims++; advance(subj); @@ -439,7 +407,7 @@ static inl* handle_strong_emph(subject* subj, char c) numdelims = scan_delims(subj, c, &can_open, &can_close); subj->pos += numdelims; - new = make_str(chunk_buf(subj->buffer, subj->pos - numdelims, numdelims)); + new = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims)); *last = new; first_head = new; result = new; @@ -488,7 +456,7 @@ static inl* handle_strong_emph(subject* subj, char c) numdelims = scan_delims(subj, c, &can_open, &can_close); if (can_close && numdelims >= 1 && numdelims <= 3 && numdelims != first_close_delims) { - new = make_str(chunk_buf(subj->buffer, subj->pos, numdelims)); + new = make_str(chunk_dup(&subj->input, subj->pos, numdelims)); append_inlines(*last, new); *last = new; if (first_close_delims == 1 && numdelims > 2) { @@ -554,7 +522,7 @@ static inl* handle_backslash(subject *subj) unsigned char nextchar = peek_char(subj); if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped advance(subj); - return make_str(chunk_buf(subj->buffer, subj->pos - 1, 1)); + return make_str(chunk_dup(&subj->input, subj->pos - 1, 1)); } else if (nextchar == '\n') { advance(subj); return make_linebreak(); @@ -569,9 +537,9 @@ static inl* handle_entity(subject* subj) { int match; inl *result; - match = scan_entity(subj->buffer, subj->pos); + match = scan_entity(&subj->input, subj->pos); if (match) { - result = make_entity(chunk_buf(subj->buffer, subj->pos, match)); + result = make_entity(chunk_dup(&subj->input, subj->pos, match)); subj->pos += match; } else { advance(subj); @@ -584,15 +552,13 @@ static inl* handle_entity(subject* subj) // Returns an inline sequence consisting of str and entity elements. static inl *make_str_with_entities(chunk *content) { - inl * result = NULL; - inl * new; + inl *result = NULL; + inl *new; int searchpos; char c; subject subj; - gh_buf content_buf = GH_BUF_INIT; - gh_buf_set(&content_buf, content->data, content->len); - init_subject(&subj, &content_buf, 0, NULL); + subject_from_chunk(&subj, content, NULL); while ((c = peek_char(&subj))) { switch (c) { @@ -600,18 +566,13 @@ static inl *make_str_with_entities(chunk *content) new = handle_entity(&subj); break; default: - searchpos = gh_buf_strchr(subj.buffer, '&', subj.pos); - if (searchpos < 0) { - searchpos = gh_buf_len(subj.buffer); - } - - new = make_str(chunk_buf(subj.buffer, subj.pos, searchpos - subj.pos)); + searchpos = chunk_strchr(&subj.input, '&', subj.pos); + new = make_str(chunk_dup(&subj.input, subj.pos, searchpos - subj.pos)); subj.pos = searchpos; } result = append_inlines(result, new); } - gh_buf_free(&content_buf); return result; } @@ -678,9 +639,9 @@ static inl* handle_pointy_brace(subject* subj) advance(subj); // advance past first < // first try to match a URL autolink - matchlen = scan_autolink_uri(subj->buffer, subj->pos); + matchlen = scan_autolink_uri(&subj->input, subj->pos); if (matchlen > 0) { - contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1); + contents = chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; return make_link( @@ -691,11 +652,11 @@ static inl* handle_pointy_brace(subject* subj) } // next try to match an email autolink - matchlen = scan_autolink_email(subj->buffer, subj->pos); + matchlen = scan_autolink_email(&subj->input, subj->pos); if (matchlen > 0) { gh_buf mail_url = GH_BUF_INIT; - contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1); + contents = chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; gh_buf_puts(&mail_url, "mailto:"); @@ -709,9 +670,9 @@ static inl* handle_pointy_brace(subject* subj) } // finally, try to match an html tag - matchlen = scan_html_tag(subj->buffer, subj->pos); + matchlen = scan_html_tag(&subj->input, subj->pos); if (matchlen > 0) { - contents = chunk_buf(subj->buffer, subj->pos - 1, matchlen + 1); + contents = chunk_dup(&subj->input, subj->pos - 1, matchlen + 1); subj->pos += matchlen; return make_raw_html(contents); } @@ -776,12 +737,7 @@ static int link_label(subject* subj, chunk *raw_label) } } if (c == ']') { - *raw_label = chunk_buf( - subj->buffer, - startpos + 1, - subj->pos - (startpos + 1) - ); - + *raw_label = chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1)); subj->label_nestlevel = 0; advance(subj); // advance past ] return 1; @@ -813,25 +769,25 @@ static inl* handle_left_bracket(subject* subj) if (found_label) { if (peek_char(subj) == '(' && - ((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) && - ((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) { + ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) && + ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) { // try to parse an explicit link: starturl = subj->pos + 1 + sps; // after ( endurl = starturl + n; - starttitle = endurl + scan_spacechars(subj->buffer, endurl); + starttitle = endurl + scan_spacechars(&subj->input, endurl); // ensure there are spaces btw url and title endtitle = (starttitle == endurl) ? starttitle : - starttitle + scan_link_title(subj->buffer, starttitle); + starttitle + scan_link_title(&subj->input, starttitle); - endall = endtitle + scan_spacechars(subj->buffer, endtitle); + endall = endtitle + scan_spacechars(&subj->input, endtitle); - if (gh_buf_at(subj->buffer, endall) == ')') { + if (peek_at(subj, endall) == ')') { subj->pos = endall + 1; - url = chunk_buf(subj->buffer, starturl, endurl - starturl); - title = chunk_buf(subj->buffer, starttitle, endtitle - starttitle); + url = chunk_dup(&subj->input, starturl, endurl - starturl); + title = chunk_dup(&subj->input, starttitle, endtitle - starttitle); lab = parse_chunk_inlines(&rawlabel, NULL); return make_link(lab, url, title); @@ -850,7 +806,7 @@ static inl* handle_left_bracket(subject* subj) // Check for reference link. // First, see if there's another label: - subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel); + subj->pos = subj->pos + scan_spacechars(&subj->input, endlabel); reflabel = rawlabel; // if followed by a nonempty link label, we change reflabel to it: @@ -892,8 +848,8 @@ static inl* handle_newline(subject *subj) advance(subj); } if (nlpos > 1 && - gh_buf_at(subj->buffer, nlpos - 1) == ' ' && - gh_buf_at(subj->buffer, nlpos - 2) == ' ') { + peek_at(subj, nlpos - 1) == ' ' && + peek_at(subj, nlpos - 2) == ' ') { return make_linebreak(); } else { return make_softbreak(); @@ -917,30 +873,22 @@ extern inl* parse_inlines_while(subject* subj, int (*f)(subject*)) inl *parse_chunk_inlines(chunk *chunk, reference** refmap) { - inl *result; subject subj; - gh_buf full_chunk = GH_BUF_INIT; - - gh_buf_set(&full_chunk, chunk->data, chunk->len); - init_subject(&subj, &full_chunk, 0, refmap); - result = parse_inlines_while(&subj, not_eof); - - gh_buf_free(&full_chunk); - return result; + subject_from_chunk(&subj, chunk, refmap); + return parse_inlines_while(&subj, not_eof); } -static int find_special_char(subject *subj) +static int subject_find_special_char(subject *subj) { int n = subj->pos + 1; - int size = (int)gh_buf_len(subj->buffer); - while (n < size) { - if (strchr("\n\\`&_*[]buffer, n))) + while (n < subj->input.len) { + if (strchr("\n\\`&_*[]input.data[n])) return n; n++; } - return -1; + return subj->input.len; } // Parse an inline, advancing subject, and add it to last element. @@ -973,11 +921,13 @@ static int parse_inline(subject* subj, inl ** last) new = handle_pointy_brace(subj); break; case '_': - if (subj->pos > 0 && (isalnum(gh_buf_at(subj->buffer, subj->pos - 1)) || - gh_buf_at(subj->buffer, subj->pos - 1) == '_')) { - new = make_str(chunk_literal("_")); - advance(subj); - break; + if (subj->pos > 0) { + unsigned char prev = peek_at(subj, subj->pos - 1); + if (isalnum(prev) || prev == '_') { + new = make_str(chunk_literal("_")); + advance(subj); + break; + } } new = handle_strong_emph(subj, '_'); @@ -1002,18 +952,13 @@ static int parse_inline(subject* subj, inl ** last) } break; default: - text_literal: - endpos = find_special_char(subj); - if (endpos < 0) { - endpos = gh_buf_len(subj->buffer); - } - - contents = chunk_buf(subj->buffer, subj->pos, endpos - subj->pos); + endpos = subject_find_special_char(subj); + contents = chunk_dup(&subj->input, subj->pos, endpos - subj->pos); subj->pos = endpos; // if we're at a newline, strip trailing spaces. if (peek_char(subj) == '\n') { - chunk_trim(&contents); + chunk_rtrim(&contents); } new = make_str(contents); @@ -1026,10 +971,10 @@ static int parse_inline(subject* subj, inl ** last) return 1; } -extern inl* parse_inlines(gh_buf *input, int input_pos, reference** refmap) +extern inl* parse_inlines(gh_buf *input, reference** refmap) { subject subj; - init_subject(&subj, input, input_pos, refmap); + subject_from_buf(&subj, input, refmap); return parse_inlines_while(&subj, not_eof); } @@ -1048,7 +993,7 @@ void spnl(subject* subj) // Modify refmap if a reference is encountered. // Return 0 if no reference found, otherwise position of subject // after reference is parsed. -extern int parse_reference(gh_buf *input, int input_pos, reference** refmap) +extern int parse_reference(gh_buf *input, reference** refmap) { subject subj; @@ -1058,9 +1003,9 @@ extern int parse_reference(gh_buf *input, int input_pos, reference** refmap) int matchlen = 0; int beforetitle; - reference * new = NULL; + reference *new = NULL; - init_subject(&subj, input, input_pos, NULL); + subject_from_buf(&subj, input, NULL); // parse label: if (!link_label(&subj, &lab)) @@ -1075,9 +1020,9 @@ extern int parse_reference(gh_buf *input, int input_pos, reference** refmap) // parse link url: spnl(&subj); - matchlen = scan_link_url(subj.buffer, subj.pos); + matchlen = scan_link_url(&subj.input, subj.pos); if (matchlen) { - url = chunk_buf(subj.buffer, subj.pos, matchlen); + url = chunk_dup(&subj.input, subj.pos, matchlen); subj.pos += matchlen; } else { return 0; @@ -1086,9 +1031,9 @@ extern int parse_reference(gh_buf *input, int input_pos, reference** refmap) // parse optional link_title beforetitle = subj.pos; spnl(&subj); - matchlen = scan_link_title(subj.buffer, subj.pos); + matchlen = scan_link_title(&subj.input, subj.pos); if (matchlen) { - title = chunk_buf(subj.buffer, subj.pos, matchlen); + title = chunk_dup(&subj.input, subj.pos, matchlen); subj.pos += matchlen; } else { subj.pos = beforetitle; diff --git a/src/print.c b/src/print.c index 0a87925..c262995 100644 --- a/src/print.c +++ b/src/print.c @@ -9,7 +9,7 @@ static void print_str(const unsigned char *s, int len) int i; if (len < 0) - len = strlen(s); + len = strlen((char *)s); putchar('"'); for (i = 0; i < len; ++i) { diff --git a/src/scanners.h b/src/scanners.h index b6e586b..f96c42d 100644 --- a/src/scanners.h +++ b/src/scanners.h @@ -1,15 +1,15 @@ -#include "buffer.h" +#include "stmd.h" -int scan_autolink_uri(const gh_buf *s, int pos); -int scan_autolink_email(const gh_buf *s, int pos); -int scan_html_tag(const gh_buf *s, int pos); -int scan_html_block_tag(const gh_buf *s, int pos); -int scan_link_url(const gh_buf *s, int pos); -int scan_link_title(const gh_buf *s, int pos); -int scan_spacechars(const gh_buf *s, int pos); -int scan_atx_header_start(const gh_buf *s, int pos); -int scan_setext_header_line(const gh_buf *s, int pos); -int scan_hrule(const gh_buf *s, int pos); -int scan_open_code_fence(const gh_buf *s, int pos); -int scan_close_code_fence(const gh_buf *s, int pos, int len); -int scan_entity(const gh_buf *s, int pos); +int scan_autolink_uri(chunk *c, int offset); +int scan_autolink_email(chunk *c, int offset); +int scan_html_tag(chunk *c, int offset); +int scan_html_block_tag(chunk *c, int offset); +int scan_link_url(chunk *c, int offset); +int scan_link_title(chunk *c, int offset); +int scan_spacechars(chunk *c, int offset); +int scan_atx_header_start(chunk *c, int offset); +int scan_setext_header_line(chunk *c, int offset); +int scan_hrule(chunk *c, int offset); +int scan_open_code_fence(chunk *c, int offset); +int scan_close_code_fence(chunk *c, int offset, int len); +int scan_entity(chunk *c, int offset); diff --git a/src/scanners.re b/src/scanners.re index 7323ef9..5ac7c15 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -1,8 +1,15 @@ -#include "buffer.h" +#include "scanners.h" + +#define SCAN_DATA \ + const unsigned char *marker = NULL; \ + const unsigned char *p = c->data + offset; \ + const unsigned char *start = p; \ + const unsigned char *end = c->data + c->len /*!re2c re2c:define:YYCTYPE = "unsigned char"; re2c:define:YYCURSOR = p; + re2c:define:YYLIMIT = end; re2c:define:YYMARKER = marker; re2c:define:YYCTXMARKER = marker; re2c:yyfill:enable = 0; @@ -55,11 +62,9 @@ */ // Try to match URI autolink after first <, returning number of chars matched. -extern int scan_autolink_uri(const gh_buf *s, int pos) +extern int scan_autolink_uri(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c scheme [:]([^\x00-\x20<>\\]|escaped_char)*[>] { return (p - start); } .? { return 0; } @@ -67,11 +72,9 @@ extern int scan_autolink_uri(const gh_buf *s, int pos) } // Try to match email autolink after first <, returning num of chars matched. -extern int scan_autolink_email(const gh_buf *s, int pos) +extern int scan_autolink_email(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+ [@] @@ -83,11 +86,9 @@ extern int scan_autolink_email(const gh_buf *s, int pos) } // Try to match an HTML tag after first <, returning num of chars matched. -extern int scan_html_tag(const gh_buf *s, int pos) +extern int scan_html_tag(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c htmltag { return (p - start); } .? { return 0; } @@ -96,11 +97,9 @@ extern int scan_html_tag(const gh_buf *s, int pos) // Try to match an HTML block tag including first <, // returning num of chars matched. -extern int scan_html_block_tag(const gh_buf *s, int pos) +extern int scan_html_block_tag(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c [<] [/] blocktagname (spacechar | [>]) { return (p - start); } [<] blocktagname (spacechar | [/>]) { return (p - start); } @@ -113,11 +112,9 @@ extern int scan_html_block_tag(const gh_buf *s, int pos) // This may optionally be contained in <..>; otherwise // whitespace and unbalanced right parentheses aren't allowed. // Newlines aren't ever allowed. -extern int scan_link_url(const gh_buf *s, int pos) +extern int scan_link_url(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c [ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); } [ \n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); } @@ -128,11 +125,9 @@ extern int scan_link_url(const gh_buf *s, int pos) // Try to match a link title (in single quotes, in double quotes, or // in parentheses), returning number of chars matched. Allow one // level of internal nesting (quotes within quotes). -extern int scan_link_title(const gh_buf *s, int pos) +extern int scan_link_title(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c ["] (escaped_char|[^"\x00])* ["] { return (p - start); } ['] (escaped_char|[^'\x00])* ['] { return (p - start); } @@ -142,10 +137,9 @@ extern int scan_link_title(const gh_buf *s, int pos) } // Match space characters, including newlines. -extern int scan_spacechars(const gh_buf *s, int pos) +extern int scan_spacechars(chunk *c, int offset) { - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c [ \t\n]* { return (p - start); } . { return 0; } @@ -153,11 +147,9 @@ extern int scan_spacechars(const gh_buf *s, int pos) } // Match ATX header start. -extern int scan_atx_header_start(const gh_buf *s, int pos) +extern int scan_atx_header_start(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c [#]{1,6} ([ ]+|[\n]) { return (p - start); } .? { return 0; } @@ -166,10 +158,9 @@ extern int scan_atx_header_start(const gh_buf *s, int pos) // Match sexext header line. Return 1 for level-1 header, // 2 for level-2, 0 for no match. -extern int scan_setext_header_line(const gh_buf *s, int pos) +extern int scan_setext_header_line(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); + SCAN_DATA; /*!re2c [=]+ [ ]* [\n] { return 1; } [-]+ [ ]* [\n] { return 2; } @@ -180,11 +171,9 @@ extern int scan_setext_header_line(const gh_buf *s, int pos) // Scan a horizontal rule line: "...three or more hyphens, asterisks, // or underscores on a line by themselves. If you wish, you may use // spaces between the hyphens or asterisks." -extern int scan_hrule(const gh_buf *s, int pos) +extern int scan_hrule(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c ([*][ ]*){3,} [ \t]* [\n] { return (p - start); } ([_][ ]*){3,} [ \t]* [\n] { return (p - start); } @@ -194,11 +183,9 @@ extern int scan_hrule(const gh_buf *s, int pos) } // Scan an opening code fence. -extern int scan_open_code_fence(const gh_buf *s, int pos) +extern int scan_open_code_fence(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c [`]{3,} / [^`\n\x00]*[\n] { return (p - start); } [~]{3,} / [^~\n\x00]*[\n] { return (p - start); } @@ -207,11 +194,9 @@ extern int scan_open_code_fence(const gh_buf *s, int pos) } // Scan a closing code fence with length at least len. -extern int scan_close_code_fence(const gh_buf *s, int pos, int len) +extern int scan_close_code_fence(chunk *c, int offset, int len) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c ([`]{3,} | [~]{3,}) / spacechar* [\n] { if (p - start > len) { @@ -225,11 +210,9 @@ extern int scan_close_code_fence(const gh_buf *s, int pos, int len) // Scans an entity. // Returns number of chars matched. -extern int scan_entity(const gh_buf *s, int pos) +extern int scan_entity(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c [&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;] { return (p - start); } diff --git a/src/stmd.h b/src/stmd.h index 3e284bd..4a3c399 100644 --- a/src/stmd.h +++ b/src/stmd.h @@ -1,17 +1,15 @@ +#ifndef _STDMD_H_ +#define _STDMD_H_ + #include #include #include "buffer.h" +#include "chunk.h" #include "uthash.h" #define VERSION "0.1" #define CODE_INDENT 4 -typedef struct { - const unsigned char *data; - int len; - int alloc; -} chunk; - typedef struct Inline { enum { INL_STRING, INL_SOFTBREAK, INL_LINEBREAK, INL_CODE, INL_RAW_HTML, INL_ENTITY, INL_EMPH, INL_STRONG, INL_LINK, INL_IMAGE } tag; @@ -79,7 +77,6 @@ typedef struct Block { struct Block* parent; struct Block* top; gh_buf string_content; - int string_pos; inl* inline_content; union { struct ListData list_data; @@ -91,10 +88,10 @@ typedef struct Block { struct Block * prev; } block; -inl* parse_inlines(gh_buf *input, int input_pos, reference** refmap); +inl* parse_inlines(gh_buf *input, reference** refmap); void free_inlines(inl* e); -int parse_reference(gh_buf *input, int input_pos, reference** refmap); +int parse_reference(gh_buf *input, reference** refmap); void free_reference(reference *ref); void free_reference_map(reference **refmap); @@ -117,3 +114,4 @@ void inlines_to_html(gh_buf *html, inl *b); void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len); +#endif -- cgit v1.2.3 From d8f44f1e4f0bd944ab43e6434a1579d670ed66cf Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Thu, 4 Sep 2014 17:49:13 +0200 Subject: 433/8 --- spec.txt | 2 +- src/html/html.c | 6 ++++-- src/inlines.c | 3 +++ src/print.c | 5 ++++- 4 files changed, 12 insertions(+), 4 deletions(-) (limited to 'src/print.c') diff --git a/spec.txt b/spec.txt index d7e70f5..cfda2a3 100644 --- a/spec.txt +++ b/spec.txt @@ -3946,7 +3946,7 @@ But this is a link: . ` . -

    http://foo.bar.`baz`

    +

    http://foo.bar.`baz`

    . And this is an HTML tag: diff --git a/src/html/html.c b/src/html/html.c index 913a602..41b8fda 100644 --- a/src/html/html.c +++ b/src/html/html.c @@ -174,7 +174,8 @@ void inlines_to_html(gh_buf *html, inl* ils) case INL_LINK: gh_buf_puts(html, "content.linkable.url, -1); + if (ils->content.linkable.url) + escape_href(html, ils->content.linkable.url, -1); if (ils->content.linkable.title) { gh_buf_puts(html, "\" title=\""); @@ -188,7 +189,8 @@ void inlines_to_html(gh_buf *html, inl* ils) case INL_IMAGE: gh_buf_puts(html, "content.linkable.url, -1); + if (ils->content.linkable.url) + escape_href(html, ils->content.linkable.url, -1); inlines_to_html(&scrap, ils->content.inlines); gh_buf_puts(html, "\" alt=\""); diff --git a/src/inlines.c b/src/inlines.c index 599be84..8e2e683 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -591,6 +591,9 @@ static unsigned char *clean_url(chunk *url, int is_email) chunk_trim(url); + if (url->len == 0) + return NULL; + if (is_email) gh_buf_puts(&buf, "mailto:"); diff --git a/src/print.c b/src/print.c index c262995..832ad4f 100644 --- a/src/print.c +++ b/src/print.c @@ -153,7 +153,10 @@ extern void print_inlines(inl* ils, int indent) case INL_LINK: case INL_IMAGE: printf("%s url=", ils->tag == INL_LINK ? "link" : "image"); - print_str(ils->content.linkable.url, -1); + + if (ils->content.linkable.url) + print_str(ils->content.linkable.url, -1); + if (ils->content.linkable.title) { printf(" title="); print_str(ils->content.linkable.title, -1); -- cgit v1.2.3 From 647b15968c95ec268d6d728eea73756c7ba648a8 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Thu, 4 Sep 2014 18:42:49 +0200 Subject: Rename inl --- src/html/html.c | 2 +- src/inlines.c | 88 ++++++++++++++++++++++++++++----------------------------- src/print.c | 2 +- src/stmd.h | 34 ++++++++++++++-------- 4 files changed, 68 insertions(+), 58 deletions(-) (limited to 'src/print.c') diff --git a/src/html/html.c b/src/html/html.c index a9356dd..53521b8 100644 --- a/src/html/html.c +++ b/src/html/html.c @@ -141,7 +141,7 @@ void blocks_to_html(strbuf *html, block *b, bool tight) } // Convert an inline list to HTML. Returns 0 on success, and sets result. -void inlines_to_html(strbuf *html, inl* ils) +void inlines_to_html(strbuf *html, struct inl* ils) { strbuf scrap = GH_BUF_INIT; diff --git a/src/inlines.c b/src/inlines.c index 33973df..301125e 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -28,9 +28,9 @@ inline static chunk chunk_literal(const char *data); inline static chunk chunk_buf_detach(strbuf *buf); inline static chunk chunk_dup(const chunk *ch, int pos, int len); -static inl *parse_chunk_inlines(chunk *chunk, reference** refmap); -static inl *parse_inlines_while(subject* subj, int (*f)(subject*)); -static int parse_inline(subject* subj, inl ** last); +static struct inl *parse_chunk_inlines(chunk *chunk, reference** refmap); +static struct inl *parse_inlines_while(subject* subj, int (*f)(subject*)); +static int parse_inline(subject* subj, struct inl ** last); static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap); static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap); @@ -108,9 +108,9 @@ extern void add_reference(reference** refmap, reference* ref) } } -inline static inl* make_link_from_reference(inl* label, reference *ref) +inline static struct inl* make_link_from_reference(struct inl* label, reference *ref) { - inl* e = (inl*) malloc(sizeof(inl)); + struct inl* e = (struct inl*) malloc(sizeof(struct inl)); e->tag = INL_LINK; e->content.linkable.label = label; e->content.linkable.url = strdup(ref->url); @@ -120,9 +120,9 @@ inline static inl* make_link_from_reference(inl* label, reference *ref) } // Create an inline with a linkable string value. -inline static inl* make_link(inl* label, chunk url, chunk title, int is_email) +inline static struct inl* make_link(struct inl* label, chunk url, chunk title, int is_email) { - inl* e = (inl*) malloc(sizeof(inl)); + struct inl* e = (struct inl*) malloc(sizeof(struct inl)); e->tag = INL_LINK; e->content.linkable.label = label; e->content.linkable.url = clean_url(&url, is_email); @@ -131,9 +131,9 @@ inline static inl* make_link(inl* label, chunk url, chunk title, int is_email) return e; } -inline static inl* make_inlines(int t, inl* contents) +inline static struct inl* make_inlines(int t, struct inl* contents) { - inl* e = (inl*) malloc(sizeof(inl)); + struct inl* e = (struct inl*) malloc(sizeof(struct inl)); e->tag = t; e->content.inlines = contents; e->next = NULL; @@ -141,9 +141,9 @@ inline static inl* make_inlines(int t, inl* contents) } // Create an inline with a literal string value. -inline static inl* make_literal(int t, chunk s) +inline static struct inl* make_literal(int t, chunk s) { - inl* e = (inl*) malloc(sizeof(inl)); + struct inl* e = (struct inl*) malloc(sizeof(struct inl)); e->tag = t; e->content.literal = s; e->next = NULL; @@ -151,9 +151,9 @@ inline static inl* make_literal(int t, chunk s) } // Create an inline with no value. -inline static inl* make_simple(int t) +inline static struct inl* make_simple(int t) { - inl* e = (inl*) malloc(sizeof(inl)); + struct inl* e = (struct inl*) malloc(sizeof(struct inl)); e->tag = t; e->next = NULL; return e; @@ -170,9 +170,9 @@ inline static inl* make_simple(int t) #define make_strong(contents) make_inlines(INL_STRONG, contents) // Free an inline list. -extern void free_inlines(inl* e) +extern void free_inlines(struct inl* e) { - inl * next; + struct inl * next; while (e != NULL) { switch (e->tag){ case INL_STRING: @@ -205,12 +205,12 @@ extern void free_inlines(inl* e) // Append inline list b to the end of inline list a. // Return pointer to head of new list. -inline static inl* append_inlines(inl* a, inl* b) +inline static struct inl* append_inlines(struct inl* a, struct inl* b) { if (a == NULL) { // NULL acts like an empty list return b; } - inl* cur = a; + struct inl* cur = a; while (cur->next) { cur = cur->next; } @@ -336,7 +336,7 @@ static void normalize_whitespace(strbuf *s) // Parse backtick code section or raw backticks, return an inline. // Assumes that the subject has a backtick at the current position. -static inl* handle_backticks(subject *subj) +static struct inl* handle_backticks(subject *subj) { chunk openticks = take_while(subj, isbacktick); int startpos = subj->pos; @@ -382,15 +382,15 @@ static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close) // Parse strong/emph or a fallback. // Assumes the subject has '_' or '*' at the current position. -static inl* handle_strong_emph(subject* subj, char c) +static struct inl* handle_strong_emph(subject* subj, char c) { bool can_open, can_close; - inl * result = NULL; - inl ** last = malloc(sizeof(inl *)); - inl * new; - inl * il; - inl * first_head = NULL; - inl * first_close = NULL; + struct inl * result = NULL; + struct inl ** last = malloc(sizeof(struct inl *)); + struct inl * new; + struct inl * il; + struct inl * first_head = NULL; + struct inl * first_close = NULL; int first_close_delims = 0; int numdelims; @@ -508,7 +508,7 @@ done: } // Parse backslash-escape or just a backslash, returning an inline. -static inl* handle_backslash(subject *subj) +static struct inl* handle_backslash(subject *subj) { advance(subj); unsigned char nextchar = peek_char(subj); @@ -525,10 +525,10 @@ static inl* handle_backslash(subject *subj) // Parse an entity or a regular "&" string. // Assumes the subject has an '&' character at the current position. -static inl* handle_entity(subject* subj) +static struct inl* handle_entity(subject* subj) { int match; - inl *result; + struct inl *result; match = scan_entity(&subj->input, subj->pos); if (match) { result = make_entity(chunk_dup(&subj->input, subj->pos, match)); @@ -542,10 +542,10 @@ static inl* handle_entity(subject* subj) // Like make_str, but parses entities. // Returns an inline sequence consisting of str and entity elements. -static inl *make_str_with_entities(chunk *content) +static struct inl *make_str_with_entities(chunk *content) { - inl *result = NULL; - inl *new; + struct inl *result = NULL; + struct inl *new; int searchpos; char c; subject subj; @@ -634,7 +634,7 @@ static unsigned char *clean_title(chunk *title) // Parse an autolink or HTML tag. // Assumes the subject has a '<' character at the current position. -static inl* handle_pointy_brace(subject* subj) +static struct inl* handle_pointy_brace(subject* subj) { int matchlen = 0; chunk contents; @@ -693,7 +693,7 @@ static inl* handle_pointy_brace(subject* subj) static int link_label(subject* subj, chunk *raw_label) { int nestlevel = 0; - inl* tmp = NULL; + struct inl* tmp = NULL; int startpos = subj->pos; if (subj->label_nestlevel) { @@ -751,10 +751,10 @@ static int link_label(subject* subj, chunk *raw_label) } // Parse a link or the link portion of an image, or return a fallback. -static inl* handle_left_bracket(subject* subj) +static struct inl* handle_left_bracket(subject* subj) { - inl *lab = NULL; - inl *result = NULL; + struct inl *lab = NULL; + struct inl *result = NULL; reference *ref; int n; int sps; @@ -838,7 +838,7 @@ static inl* handle_left_bracket(subject* subj) // Parse a hard or soft linebreak, returning an inline. // Assumes the subject has a newline at the current position. -static inl* handle_newline(subject *subj) +static struct inl* handle_newline(subject *subj) { int nlpos = subj->pos; // skip over newline @@ -862,16 +862,16 @@ inline static int not_eof(subject* subj) } // Parse inlines while a predicate is satisfied. Return inlines. -extern inl* parse_inlines_while(subject* subj, int (*f)(subject*)) +extern struct inl* parse_inlines_while(subject* subj, int (*f)(subject*)) { - inl* result = NULL; - inl** last = &result; + struct inl* result = NULL; + struct inl** last = &result; while ((*f)(subj) && parse_inline(subj, last)) { } return result; } -inl *parse_chunk_inlines(chunk *chunk, reference** refmap) +struct inl *parse_chunk_inlines(chunk *chunk, reference** refmap) { subject subj; subject_from_chunk(&subj, chunk, refmap); @@ -894,9 +894,9 @@ static int subject_find_special_char(subject *subj) // Parse an inline, advancing subject, and add it to last element. // Adjust tail to point to new last element of list. // Return 0 if no inline can be parsed, 1 otherwise. -static int parse_inline(subject* subj, inl ** last) +static int parse_inline(subject* subj, struct inl ** last) { - inl* new = NULL; + struct inl* new = NULL; chunk contents; unsigned char c; int endpos; @@ -971,7 +971,7 @@ static int parse_inline(subject* subj, inl ** last) return 1; } -extern inl* parse_inlines(strbuf *input, reference** refmap) +extern struct inl* parse_inlines(strbuf *input, reference** refmap) { subject subj; subject_from_buf(&subj, input, refmap); diff --git a/src/print.c b/src/print.c index 832ad4f..63f63c8 100644 --- a/src/print.c +++ b/src/print.c @@ -117,7 +117,7 @@ extern void print_blocks(block* b, int indent) } // Prettyprint an inline list, for debugging. -extern void print_inlines(inl* ils, int indent) +extern void print_inlines(struct inl* ils, int indent) { while(ils != NULL) { for (int i=0; i < indent; i++) { diff --git a/src/stmd.h b/src/stmd.h index 2e86f3a..9ed33ec 100644 --- a/src/stmd.h +++ b/src/stmd.h @@ -10,20 +10,30 @@ #define VERSION "0.1" #define CODE_INDENT 4 -typedef struct Inline { - enum { INL_STRING, INL_SOFTBREAK, INL_LINEBREAK, INL_CODE, INL_RAW_HTML, INL_ENTITY, - INL_EMPH, INL_STRONG, INL_LINK, INL_IMAGE } tag; +struct inl { + enum { + INL_STRING, + INL_SOFTBREAK, + INL_LINEBREAK, + INL_CODE, + INL_RAW_HTML, + INL_ENTITY, + INL_EMPH, + INL_STRONG, + INL_LINK, + INL_IMAGE + } tag; union { chunk literal; - struct Inline *inlines; + struct inl *inlines; struct { - struct Inline *label; + struct inl *label; unsigned char *url; unsigned char *title; } linkable; } content; - struct Inline *next; -} inl; + struct inl *next; +}; typedef struct Reference { unsigned char *label; @@ -77,7 +87,7 @@ typedef struct Block { struct Block* parent; struct Block* top; strbuf string_content; - inl* inline_content; + struct inl* inline_content; union { struct ListData list_data; struct FencedCodeData fenced_code_data; @@ -88,8 +98,8 @@ typedef struct Block { struct Block * prev; } block; -inl* parse_inlines(strbuf *input, reference** refmap); -void free_inlines(inl* e); +struct inl* parse_inlines(strbuf *input, reference** refmap); +void free_inlines(struct inl* e); int parse_reference(strbuf *input, reference** refmap); void free_reference(reference *ref); @@ -106,11 +116,11 @@ void free_blocks(block* e); extern block *stmd_parse_document(const unsigned char *buffer, size_t len); extern block *stmd_parse_file(FILE *f); -void print_inlines(inl* ils, int indent); +void print_inlines(struct inl* ils, int indent); void print_blocks(block* blk, int indent); void blocks_to_html(strbuf *html, block *b, bool tight); -void inlines_to_html(strbuf *html, inl *b); +void inlines_to_html(strbuf *html, struct inl *b); void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len); -- cgit v1.2.3 From 9e4855365b920c2a80b0f1ab6937280f0b504334 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Thu, 4 Sep 2014 18:45:44 +0200 Subject: Rename `inl` --- src/html/html.c | 2 +- src/inlines.c | 88 ++++++++++++++++++++++++++++----------------------------- src/print.c | 2 +- src/stmd.h | 28 ++++++++++-------- 4 files changed, 62 insertions(+), 58 deletions(-) (limited to 'src/print.c') diff --git a/src/html/html.c b/src/html/html.c index 53521b8..a7bb21a 100644 --- a/src/html/html.c +++ b/src/html/html.c @@ -141,7 +141,7 @@ void blocks_to_html(strbuf *html, block *b, bool tight) } // Convert an inline list to HTML. Returns 0 on success, and sets result. -void inlines_to_html(strbuf *html, struct inl* ils) +void inlines_to_html(strbuf *html, node_inl* ils) { strbuf scrap = GH_BUF_INIT; diff --git a/src/inlines.c b/src/inlines.c index 301125e..6bb89da 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -28,9 +28,9 @@ inline static chunk chunk_literal(const char *data); inline static chunk chunk_buf_detach(strbuf *buf); inline static chunk chunk_dup(const chunk *ch, int pos, int len); -static struct inl *parse_chunk_inlines(chunk *chunk, reference** refmap); -static struct inl *parse_inlines_while(subject* subj, int (*f)(subject*)); -static int parse_inline(subject* subj, struct inl ** last); +static node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap); +static node_inl *parse_inlines_while(subject* subj, int (*f)(subject*)); +static int parse_inline(subject* subj, node_inl ** last); static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap); static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap); @@ -108,9 +108,9 @@ extern void add_reference(reference** refmap, reference* ref) } } -inline static struct inl* make_link_from_reference(struct inl* label, reference *ref) +inline static node_inl* make_link_from_reference(node_inl* label, reference *ref) { - struct inl* e = (struct inl*) malloc(sizeof(struct inl)); + node_inl* e = (node_inl*) malloc(sizeof(node_inl)); e->tag = INL_LINK; e->content.linkable.label = label; e->content.linkable.url = strdup(ref->url); @@ -120,9 +120,9 @@ inline static struct inl* make_link_from_reference(struct inl* label, reference } // Create an inline with a linkable string value. -inline static struct inl* make_link(struct inl* label, chunk url, chunk title, int is_email) +inline static node_inl* make_link(node_inl* label, chunk url, chunk title, int is_email) { - struct inl* e = (struct inl*) malloc(sizeof(struct inl)); + node_inl* e = (node_inl*) malloc(sizeof(node_inl)); e->tag = INL_LINK; e->content.linkable.label = label; e->content.linkable.url = clean_url(&url, is_email); @@ -131,9 +131,9 @@ inline static struct inl* make_link(struct inl* label, chunk url, chunk title, i return e; } -inline static struct inl* make_inlines(int t, struct inl* contents) +inline static node_inl* make_inlines(int t, node_inl* contents) { - struct inl* e = (struct inl*) malloc(sizeof(struct inl)); + node_inl* e = (node_inl*) malloc(sizeof(node_inl)); e->tag = t; e->content.inlines = contents; e->next = NULL; @@ -141,9 +141,9 @@ inline static struct inl* make_inlines(int t, struct inl* contents) } // Create an inline with a literal string value. -inline static struct inl* make_literal(int t, chunk s) +inline static node_inl* make_literal(int t, chunk s) { - struct inl* e = (struct inl*) malloc(sizeof(struct inl)); + node_inl* e = (node_inl*) malloc(sizeof(node_inl)); e->tag = t; e->content.literal = s; e->next = NULL; @@ -151,9 +151,9 @@ inline static struct inl* make_literal(int t, chunk s) } // Create an inline with no value. -inline static struct inl* make_simple(int t) +inline static node_inl* make_simple(int t) { - struct inl* e = (struct inl*) malloc(sizeof(struct inl)); + node_inl* e = (node_inl*) malloc(sizeof(node_inl)); e->tag = t; e->next = NULL; return e; @@ -170,9 +170,9 @@ inline static struct inl* make_simple(int t) #define make_strong(contents) make_inlines(INL_STRONG, contents) // Free an inline list. -extern void free_inlines(struct inl* e) +extern void free_inlines(node_inl* e) { - struct inl * next; + node_inl * next; while (e != NULL) { switch (e->tag){ case INL_STRING: @@ -205,12 +205,12 @@ extern void free_inlines(struct inl* e) // Append inline list b to the end of inline list a. // Return pointer to head of new list. -inline static struct inl* append_inlines(struct inl* a, struct inl* b) +inline static node_inl* append_inlines(node_inl* a, node_inl* b) { if (a == NULL) { // NULL acts like an empty list return b; } - struct inl* cur = a; + node_inl* cur = a; while (cur->next) { cur = cur->next; } @@ -336,7 +336,7 @@ static void normalize_whitespace(strbuf *s) // Parse backtick code section or raw backticks, return an inline. // Assumes that the subject has a backtick at the current position. -static struct inl* handle_backticks(subject *subj) +static node_inl* handle_backticks(subject *subj) { chunk openticks = take_while(subj, isbacktick); int startpos = subj->pos; @@ -382,15 +382,15 @@ static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close) // Parse strong/emph or a fallback. // Assumes the subject has '_' or '*' at the current position. -static struct inl* handle_strong_emph(subject* subj, char c) +static node_inl* handle_strong_emph(subject* subj, char c) { bool can_open, can_close; - struct inl * result = NULL; - struct inl ** last = malloc(sizeof(struct inl *)); - struct inl * new; - struct inl * il; - struct inl * first_head = NULL; - struct inl * first_close = NULL; + node_inl * result = NULL; + node_inl ** last = malloc(sizeof(node_inl *)); + node_inl * new; + node_inl * il; + node_inl * first_head = NULL; + node_inl * first_close = NULL; int first_close_delims = 0; int numdelims; @@ -508,7 +508,7 @@ done: } // Parse backslash-escape or just a backslash, returning an inline. -static struct inl* handle_backslash(subject *subj) +static node_inl* handle_backslash(subject *subj) { advance(subj); unsigned char nextchar = peek_char(subj); @@ -525,10 +525,10 @@ static struct inl* handle_backslash(subject *subj) // Parse an entity or a regular "&" string. // Assumes the subject has an '&' character at the current position. -static struct inl* handle_entity(subject* subj) +static node_inl* handle_entity(subject* subj) { int match; - struct inl *result; + node_inl *result; match = scan_entity(&subj->input, subj->pos); if (match) { result = make_entity(chunk_dup(&subj->input, subj->pos, match)); @@ -542,10 +542,10 @@ static struct inl* handle_entity(subject* subj) // Like make_str, but parses entities. // Returns an inline sequence consisting of str and entity elements. -static struct inl *make_str_with_entities(chunk *content) +static node_inl *make_str_with_entities(chunk *content) { - struct inl *result = NULL; - struct inl *new; + node_inl *result = NULL; + node_inl *new; int searchpos; char c; subject subj; @@ -634,7 +634,7 @@ static unsigned char *clean_title(chunk *title) // Parse an autolink or HTML tag. // Assumes the subject has a '<' character at the current position. -static struct inl* handle_pointy_brace(subject* subj) +static node_inl* handle_pointy_brace(subject* subj) { int matchlen = 0; chunk contents; @@ -693,7 +693,7 @@ static struct inl* handle_pointy_brace(subject* subj) static int link_label(subject* subj, chunk *raw_label) { int nestlevel = 0; - struct inl* tmp = NULL; + node_inl* tmp = NULL; int startpos = subj->pos; if (subj->label_nestlevel) { @@ -751,10 +751,10 @@ static int link_label(subject* subj, chunk *raw_label) } // Parse a link or the link portion of an image, or return a fallback. -static struct inl* handle_left_bracket(subject* subj) +static node_inl* handle_left_bracket(subject* subj) { - struct inl *lab = NULL; - struct inl *result = NULL; + node_inl *lab = NULL; + node_inl *result = NULL; reference *ref; int n; int sps; @@ -838,7 +838,7 @@ static struct inl* handle_left_bracket(subject* subj) // Parse a hard or soft linebreak, returning an inline. // Assumes the subject has a newline at the current position. -static struct inl* handle_newline(subject *subj) +static node_inl* handle_newline(subject *subj) { int nlpos = subj->pos; // skip over newline @@ -862,16 +862,16 @@ inline static int not_eof(subject* subj) } // Parse inlines while a predicate is satisfied. Return inlines. -extern struct inl* parse_inlines_while(subject* subj, int (*f)(subject*)) +extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*)) { - struct inl* result = NULL; - struct inl** last = &result; + node_inl* result = NULL; + node_inl** last = &result; while ((*f)(subj) && parse_inline(subj, last)) { } return result; } -struct inl *parse_chunk_inlines(chunk *chunk, reference** refmap) +node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap) { subject subj; subject_from_chunk(&subj, chunk, refmap); @@ -894,9 +894,9 @@ static int subject_find_special_char(subject *subj) // Parse an inline, advancing subject, and add it to last element. // Adjust tail to point to new last element of list. // Return 0 if no inline can be parsed, 1 otherwise. -static int parse_inline(subject* subj, struct inl ** last) +static int parse_inline(subject* subj, node_inl ** last) { - struct inl* new = NULL; + node_inl* new = NULL; chunk contents; unsigned char c; int endpos; @@ -971,7 +971,7 @@ static int parse_inline(subject* subj, struct inl ** last) return 1; } -extern struct inl* parse_inlines(strbuf *input, reference** refmap) +extern node_inl* parse_inlines(strbuf *input, reference** refmap) { subject subj; subject_from_buf(&subj, input, refmap); diff --git a/src/print.c b/src/print.c index 63f63c8..01e9136 100644 --- a/src/print.c +++ b/src/print.c @@ -117,7 +117,7 @@ extern void print_blocks(block* b, int indent) } // Prettyprint an inline list, for debugging. -extern void print_inlines(struct inl* ils, int indent) +extern void print_inlines(node_inl* ils, int indent) { while(ils != NULL) { for (int i=0; i < indent; i++) { diff --git a/src/stmd.h b/src/stmd.h index 9ed33ec..dbc8c8c 100644 --- a/src/stmd.h +++ b/src/stmd.h @@ -10,7 +10,7 @@ #define VERSION "0.1" #define CODE_INDENT 4 -struct inl { +struct node_inl { enum { INL_STRING, INL_SOFTBREAK, @@ -25,22 +25,26 @@ struct inl { } tag; union { chunk literal; - struct inl *inlines; + struct node_inl *inlines; struct { - struct inl *label; + struct node_inl *label; unsigned char *url; unsigned char *title; } linkable; } content; - struct inl *next; + struct node_inl *next; }; -typedef struct Reference { +typedef struct node_inl node_inl; + +struct reference { unsigned char *label; unsigned char *url; unsigned char *title; - UT_hash_handle hh; // used by uthash -} reference; + UT_hash_handle hh; // used by uthash +}; + +typedef struct reference reference; // Types for blocks @@ -87,7 +91,7 @@ typedef struct Block { struct Block* parent; struct Block* top; strbuf string_content; - struct inl* inline_content; + node_inl* inline_content; union { struct ListData list_data; struct FencedCodeData fenced_code_data; @@ -98,8 +102,8 @@ typedef struct Block { struct Block * prev; } block; -struct inl* parse_inlines(strbuf *input, reference** refmap); -void free_inlines(struct inl* e); +node_inl* parse_inlines(strbuf *input, reference** refmap); +void free_inlines(node_inl* e); int parse_reference(strbuf *input, reference** refmap); void free_reference(reference *ref); @@ -116,11 +120,11 @@ void free_blocks(block* e); extern block *stmd_parse_document(const unsigned char *buffer, size_t len); extern block *stmd_parse_file(FILE *f); -void print_inlines(struct inl* ils, int indent); +void print_inlines(node_inl* ils, int indent); void print_blocks(block* blk, int indent); void blocks_to_html(strbuf *html, block *b, bool tight); -void inlines_to_html(strbuf *html, struct inl *b); +void inlines_to_html(strbuf *html, node_inl *b); void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len); -- cgit v1.2.3 From 19ba82d7a30bd999a25fc303a8516056880abc9d Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Thu, 4 Sep 2014 18:49:33 +0200 Subject: Rename node_block --- src/blocks.c | 100 ++++++++++++++++++++++++++++---------------------------- src/html/html.c | 6 ++-- src/main.c | 4 +-- src/print.c | 4 +-- src/stmd.h | 57 +++++++++++++++++--------------- 5 files changed, 87 insertions(+), 84 deletions(-) (limited to 'src/print.c') diff --git a/src/blocks.c b/src/blocks.c index 9faccd9..d74ceb2 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -10,13 +10,13 @@ #define peek_at(i, n) (i)->data[n] -static void incorporate_line(strbuf *ln, int line_number, block** curptr); -static void finalize(block* b, int line_number); +static void incorporate_line(strbuf *ln, int line_number, node_block** curptr); +static void finalize(node_block* b, int line_number); -static block* make_block(int tag, int start_line, int start_column) +static node_block* make_block(int tag, int start_line, int start_column) { - block* e; - e = (block*) malloc(sizeof(block)); + node_block* e; + e = (node_block*) malloc(sizeof(node_block)); e->tag = tag; e->open = true; e->last_line_blank = false; @@ -35,10 +35,10 @@ static block* make_block(int tag, int start_line, int start_column) return e; } -// Create a root document block. -extern block* make_document() +// Create a root document node_block. +extern node_block* make_document() { - block * e = make_block(document, 1, 1); + node_block * e = make_block(document, 1, 1); reference * map = NULL; reference ** refmap; refmap = (reference**) malloc(sizeof(reference*)); @@ -82,10 +82,10 @@ static inline bool accepts_lines(int block_type) block_type == fenced_code); } -static void add_line(block* block, chunk *ch, int offset) +static void add_line(node_block* node_block, chunk *ch, int offset) { - assert(block->open); - strbuf_put(&block->string_content, ch->data + offset, ch->len - offset); + assert(node_block->open); + strbuf_put(&node_block->string_content, ch->data + offset, ch->len - offset); } static void remove_trailing_blank_lines(strbuf *ln) @@ -109,25 +109,25 @@ static void remove_trailing_blank_lines(strbuf *ln) strbuf_truncate(ln, i); } -// Check to see if a block ends with a blank line, descending +// Check to see if a node_block ends with a blank line, descending // if needed into lists and sublists. -static bool ends_with_blank_line(block* block) +static bool ends_with_blank_line(node_block* node_block) { - if (block->last_line_blank) { + if (node_block->last_line_blank) { return true; } - if ((block->tag == list || block->tag == list_item) && block->last_child) { - return ends_with_blank_line(block->last_child); + if ((node_block->tag == list || node_block->tag == list_item) && node_block->last_child) { + return ends_with_blank_line(node_block->last_child); } else { return false; } } // Break out of all containing lists -static int break_out_of_lists(block ** bptr, int line_number) +static int break_out_of_lists(node_block ** bptr, int line_number) { - block * container = *bptr; - block * b = container->top; + node_block * container = *bptr; + node_block * b = container->top; // find first containing list: while (b && b->tag != list) { b = b->last_child; @@ -144,15 +144,15 @@ static int break_out_of_lists(block ** bptr, int line_number) } -static void finalize(block* b, int line_number) +static void finalize(node_block* b, int line_number) { int firstlinelen; int pos; - block* item; - block* subitem; + node_block* item; + node_block* subitem; if (!b->open) - return; // don't do anything if the block is already closed + return; // don't do anything if the node_block is already closed b->open = false; if (line_number > b->start_line) { @@ -230,20 +230,20 @@ static void finalize(block* b, int line_number) } } -// Add a block as child of another. Return pointer to child. -extern block* add_child(block* parent, +// Add a node_block as child of another. Return pointer to child. +extern node_block* add_child(node_block* parent, int block_type, int start_line, int start_column) { assert(parent); - // if 'parent' isn't the kind of block that can accept this child, - // then back up til we hit a block that can. + // if 'parent' isn't the kind of node_block that can accept this child, + // then back up til we hit a node_block that can. while (!can_contain(parent->tag, block_type)) { finalize(parent, start_line); parent = parent->parent; } - block* child = make_block(block_type, start_line, start_column); + node_block* child = make_block(block_type, start_line, start_column); child->parent = parent; child->top = parent->top; @@ -258,10 +258,10 @@ extern block* add_child(block* parent, return child; } -// Free a block list and any children. -extern void free_blocks(block* e) +// Free a node_block list and any children. +extern void free_blocks(node_block* e) { - block * next; + node_block * next; while (e != NULL) { next = e->next; free_inlines(e->inline_content); @@ -277,9 +277,9 @@ extern void free_blocks(block* e) } } -// Walk through block and all children, recursively, parsing +// Walk through node_block and all children, recursively, parsing // string content into inline content where appropriate. -void process_inlines(block* cur, reference** refmap) +void process_inlines(node_block* cur, reference** refmap) { switch (cur->tag) { case paragraph: @@ -294,7 +294,7 @@ void process_inlines(block* cur, reference** refmap) break; } - block *child = cur->children; + node_block *child = cur->children; while (child != NULL) { process_inlines(child, refmap); child = child->next; @@ -394,7 +394,7 @@ static void expand_tabs(strbuf *ob, const unsigned char *line, size_t size) } } -static block *finalize_document(block *document, int linenum) +static node_block *finalize_document(node_block *document, int linenum) { while (document != document->top) { finalize(document, linenum); @@ -407,12 +407,12 @@ static block *finalize_document(block *document, int linenum) return document; } -extern block *stmd_parse_file(FILE *f) +extern node_block *stmd_parse_file(FILE *f) { strbuf line = GH_BUF_INIT; unsigned char buffer[4096]; int linenum = 1; - block *document = make_document(); + node_block *document = make_document(); while (fgets((char *)buffer, sizeof(buffer), f)) { expand_tabs(&line, buffer, strlen((char *)buffer)); @@ -425,12 +425,12 @@ extern block *stmd_parse_file(FILE *f) return finalize_document(document, linenum); } -extern block *stmd_parse_document(const unsigned char *buffer, size_t len) +extern node_block *stmd_parse_document(const unsigned char *buffer, size_t len) { strbuf line = GH_BUF_INIT; int linenum = 1; const unsigned char *end = buffer + len; - block *document = make_document(); + node_block *document = make_document(); while (buffer < end) { const unsigned char *eol = memchr(buffer, '\n', end - buffer); @@ -470,18 +470,18 @@ static void chop_trailing_hashtags(chunk *ch) ch->len = n + 1; } -// Process one line at a time, modifying a block. -static void incorporate_line(strbuf *line, int line_number, block** curptr) +// Process one line at a time, modifying a node_block. +static void incorporate_line(strbuf *line, int line_number, node_block** curptr) { - block* last_matched_container; + node_block* last_matched_container; int offset = 0; int matched = 0; int lev = 0; int i; struct ListData * data = NULL; bool all_matched = true; - block* container; - block* cur = *curptr; + node_block* container; + node_block* cur = *curptr; bool blank = false; int first_nonspace; int indent; @@ -493,8 +493,8 @@ static void incorporate_line(strbuf *line, int line_number, block** curptr) // container starts at the document root. container = cur->top; - // for each containing block, try to parse the associated line start. - // bail out on failure: container will point to the last matching block. + // for each containing node_block, try to parse the associated line start. + // bail out on failure: container will point to the last matching node_block. while (container->last_child && container->last_child->open) { container = container->last_child; @@ -570,7 +570,7 @@ static void incorporate_line(strbuf *line, int line_number, block** curptr) } if (!all_matched) { - container = container->parent; // back up to last matching block + container = container->parent; // back up to last matching node_block break; } } @@ -582,7 +582,7 @@ static void incorporate_line(strbuf *line, int line_number, block** curptr) break_out_of_lists(&container, line_number); } - // unless last matched container is code block, try new container starts: + // unless last matched container is code node_block, try new container starts: while (container->tag != fenced_code && container->tag != indented_code && container->tag != html_block) { @@ -713,7 +713,7 @@ static void incorporate_line(strbuf *line, int line_number, block** curptr) indent = first_nonspace - offset; blank = peek_at(&input, first_nonspace) == '\n'; - // block quote lines are never blank as they start with > + // node_block quote lines are never blank as they start with > // and we don't count blanks in fenced code for purposes of tight/loose // lists or breaking out of lists. we also don't set last_line_blank // on an empty list item. @@ -724,7 +724,7 @@ static void incorporate_line(strbuf *line, int line_number, block** curptr) container->children == NULL && container->start_line == line_number)); - block *cont = container; + node_block *cont = container; while (cont->parent) { cont->parent->last_line_blank = false; cont = cont->parent; diff --git a/src/html/html.c b/src/html/html.c index a7bb21a..6041fde 100644 --- a/src/html/html.c +++ b/src/html/html.c @@ -8,7 +8,7 @@ #include "debug.h" #include "html/houdini.h" -// Functions to convert block and inline lists to HTML strings. +// Functions to convert node_block and inline lists to HTML strings. static void escape_html(strbuf *dest, const unsigned char *source, int length) { @@ -32,8 +32,8 @@ static inline void cr(strbuf *html) strbuf_putc(html, '\n'); } -// Convert a block list to HTML. Returns 0 on success, and sets result. -void blocks_to_html(strbuf *html, block *b, bool tight) +// Convert a node_block list to HTML. Returns 0 on success, and sets result. +void blocks_to_html(strbuf *html, node_block *b, bool tight) { struct ListData *data; diff --git a/src/main.c b/src/main.c index 7cf67e2..90bb16d 100644 --- a/src/main.c +++ b/src/main.c @@ -12,7 +12,7 @@ void print_usage() printf(" --version Print version\n"); } -static void print_document(block *document, bool ast) +static void print_document(node_block *document, bool ast) { strbuf html = GH_BUF_INIT; @@ -30,7 +30,7 @@ int main(int argc, char *argv[]) int i, numfps = 0; bool ast = false; int files[argc]; - block *document = NULL; + node_block *document = NULL; for (i = 1; i < argc; i++) { if (strcmp(argv[i], "--version") == 0) { diff --git a/src/print.c b/src/print.c index 01e9136..069d299 100644 --- a/src/print.c +++ b/src/print.c @@ -32,9 +32,9 @@ static void print_str(const unsigned char *s, int len) putchar('"'); } -// Functions to pretty-print inline and block lists, for debugging. +// Functions to pretty-print inline and node_block lists, for debugging. // Prettyprint an inline list, for debugging. -extern void print_blocks(block* b, int indent) +extern void print_blocks(node_block* b, int indent) { struct ListData *data; diff --git a/src/stmd.h b/src/stmd.h index dbc8c8c..957ab03 100644 --- a/src/stmd.h +++ b/src/stmd.h @@ -67,29 +67,30 @@ struct FencedCodeData { strbuf info; }; -typedef struct Block { - enum { document, - block_quote, - list, - list_item, - fenced_code, - indented_code, - html_block, - paragraph, - atx_header, - setext_header, - hrule, - reference_def +struct node_block { + enum { + document, + block_quote, + list, + list_item, + fenced_code, + indented_code, + html_block, + paragraph, + atx_header, + setext_header, + hrule, + reference_def } tag; int start_line; int start_column; int end_line; bool open; bool last_line_blank; - struct Block* children; - struct Block* last_child; - struct Block* parent; - struct Block* top; + struct node_block* children; + struct node_block* last_child; + struct node_block* parent; + struct node_block* top; strbuf string_content; node_inl* inline_content; union { @@ -98,9 +99,11 @@ typedef struct Block { int header_level; reference** refmap; } attributes; - struct Block * next; - struct Block * prev; -} block; + struct node_block * next; + struct node_block * prev; +}; + +typedef struct node_block node_block; node_inl* parse_inlines(strbuf *input, reference** refmap); void free_inlines(node_inl* e); @@ -112,18 +115,18 @@ void free_reference_map(reference **refmap); void add_reference(reference** refmap, reference* ref); void unescape_buffer(strbuf *buf); -extern block* make_document(); -extern block* add_child(block* parent, +extern node_block* make_document(); +extern node_block* add_child(node_block* parent, int block_type, int start_line, int start_column); -void free_blocks(block* e); +void free_blocks(node_block* e); -extern block *stmd_parse_document(const unsigned char *buffer, size_t len); -extern block *stmd_parse_file(FILE *f); +extern node_block *stmd_parse_document(const unsigned char *buffer, size_t len); +extern node_block *stmd_parse_file(FILE *f); void print_inlines(node_inl* ils, int indent); -void print_blocks(block* blk, int indent); +void print_blocks(node_block* blk, int indent); -void blocks_to_html(strbuf *html, block *b, bool tight); +void blocks_to_html(strbuf *html, node_block *b, bool tight); void inlines_to_html(strbuf *html, node_inl *b); void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len); -- cgit v1.2.3 From 806ff17755c90579afc68914b251b80e2f8c4b77 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Thu, 4 Sep 2014 18:56:52 +0200 Subject: Rename block literals --- src/blocks.c | 110 ++++++++++++++++++++++++++++---------------------------- src/html/html.c | 26 +++++++------- src/print.c | 24 ++++++------- src/stmd.h | 43 +++++++++++----------- 4 files changed, 100 insertions(+), 103 deletions(-) (limited to 'src/print.c') diff --git a/src/blocks.c b/src/blocks.c index d74ceb2..f671b5e 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -38,7 +38,7 @@ static node_block* make_block(int tag, int start_line, int start_column) // Create a root document node_block. extern node_block* make_document() { - node_block * e = make_block(document, 1, 1); + node_block * e = make_block(BLOCK_DOCUMENT, 1, 1); reference * map = NULL; reference ** refmap; refmap = (reference**) malloc(sizeof(reference*)); @@ -68,18 +68,18 @@ bool is_blank(strbuf *s, int offset) static inline bool can_contain(int parent_type, int child_type) { - return ( parent_type == document || - parent_type == block_quote || - parent_type == list_item || - (parent_type == list && child_type == list_item) ); + return ( parent_type == BLOCK_DOCUMENT || + parent_type == BLOCK_BQUOTE || + parent_type == BLOCK_LIST_ITEM || + (parent_type == BLOCK_LIST && child_type == BLOCK_LIST_ITEM) ); } static inline bool accepts_lines(int block_type) { - return (block_type == paragraph || - block_type == atx_header || - block_type == indented_code || - block_type == fenced_code); + return (block_type == BLOCK_PARAGRAPH || + block_type == BLOCK_ATX_HEADER || + block_type == BLOCK_INDENTED_CODE || + block_type == BLOCK_FENCED_CODE); } static void add_line(node_block* node_block, chunk *ch, int offset) @@ -116,7 +116,7 @@ static bool ends_with_blank_line(node_block* node_block) if (node_block->last_line_blank) { return true; } - if ((node_block->tag == list || node_block->tag == list_item) && node_block->last_child) { + if ((node_block->tag == BLOCK_LIST || node_block->tag == BLOCK_LIST_ITEM) && node_block->last_child) { return ends_with_blank_line(node_block->last_child); } else { return false; @@ -128,8 +128,8 @@ static int break_out_of_lists(node_block ** bptr, int line_number) { node_block * container = *bptr; node_block * b = container->top; - // find first containing list: - while (b && b->tag != list) { + // find first containing BLOCK_LIST: + while (b && b->tag != BLOCK_LIST) { b = b->last_child; } if (b) { @@ -162,7 +162,7 @@ static void finalize(node_block* b, int line_number) } switch (b->tag) { - case paragraph: + case BLOCK_PARAGRAPH: pos = 0; while (strbuf_at(&b->string_content, 0) == '[' && (pos = parse_reference(&b->string_content, b->top->attributes.refmap))) { @@ -170,16 +170,16 @@ static void finalize(node_block* b, int line_number) strbuf_drop(&b->string_content, pos); } if (is_blank(&b->string_content, 0)) { - b->tag = reference_def; + b->tag = BLOCK_REFERENCE_DEF; } break; - case indented_code: + case BLOCK_INDENTED_CODE: remove_trailing_blank_lines(&b->string_content); strbuf_putc(&b->string_content, '\n'); break; - case fenced_code: + case BLOCK_FENCED_CODE: // first line of contents becomes info firstlinelen = strbuf_strchr(&b->string_content, '\n', 0); @@ -196,7 +196,7 @@ static void finalize(node_block* b, int line_number) unescape_buffer(&b->attributes.fenced_code_data.info); break; - case list: // determine tight/loose status + case BLOCK_LIST: // determine tight/loose status b->attributes.list_data.tight = true; // tight by default item = b->children; @@ -266,9 +266,9 @@ extern void free_blocks(node_block* e) next = e->next; free_inlines(e->inline_content); strbuf_free(&e->string_content); - if (e->tag == fenced_code) { + if (e->tag == BLOCK_FENCED_CODE) { strbuf_free(&e->attributes.fenced_code_data.info); - } else if (e->tag == document) { + } else if (e->tag == BLOCK_DOCUMENT) { free_reference_map(e->attributes.refmap); } free_blocks(e->children); @@ -282,9 +282,9 @@ extern void free_blocks(node_block* e) void process_inlines(node_block* cur, reference** refmap) { switch (cur->tag) { - case paragraph: - case atx_header: - case setext_header: + case BLOCK_PARAGRAPH: + case BLOCK_ATX_HEADER: + case BLOCK_SETEXT_HEADER: cur->inline_content = parse_inlines(&cur->string_content, refmap); // MEM // strbuf_free(&cur->string_content); @@ -507,7 +507,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) indent = first_nonspace - offset; blank = peek_at(&input, first_nonspace) == '\n'; - if (container->tag == block_quote) { + if (container->tag == BLOCK_BQUOTE) { matched = indent <= 3 && peek_at(&input, first_nonspace) == '>'; if (matched) { offset = first_nonspace + 1; @@ -517,7 +517,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) all_matched = false; } - } else if (container->tag == list_item) { + } else if (container->tag == BLOCK_LIST_ITEM) { if (indent >= container->attributes.list_data.marker_offset + container->attributes.list_data.padding) { @@ -529,7 +529,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) all_matched = false; } - } else if (container->tag == indented_code) { + } else if (container->tag == BLOCK_INDENTED_CODE) { if (indent >= CODE_INDENT) { offset += CODE_INDENT; @@ -539,13 +539,13 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) all_matched = false; } - } else if (container->tag == atx_header || - container->tag == setext_header) { + } else if (container->tag == BLOCK_ATX_HEADER || + container->tag == BLOCK_SETEXT_HEADER) { // a header can never contain more than one line all_matched = false; - } else if (container->tag == fenced_code) { + } else if (container->tag == BLOCK_FENCED_CODE) { // skip optional spaces of fence offset i = container->attributes.fenced_code_data.fence_offset; @@ -554,13 +554,13 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) i--; } - } else if (container->tag == html_block) { + } else if (container->tag == BLOCK_HTML) { if (blank) { all_matched = false; } - } else if (container->tag == paragraph) { + } else if (container->tag == BLOCK_PARAGRAPH) { if (blank) { container->last_line_blank = true; @@ -583,8 +583,8 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) } // unless last matched container is code node_block, try new container starts: - while (container->tag != fenced_code && container->tag != indented_code && - container->tag != html_block) { + while (container->tag != BLOCK_FENCED_CODE && container->tag != BLOCK_INDENTED_CODE && + container->tag != BLOCK_HTML) { first_nonspace = offset; while (peek_at(&input, first_nonspace) == ' ') @@ -594,9 +594,9 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) blank = peek_at(&input, first_nonspace) == '\n'; if (indent >= CODE_INDENT) { - if (cur->tag != paragraph && !blank) { + if (cur->tag != BLOCK_PARAGRAPH && !blank) { offset += CODE_INDENT; - container = add_child(container, indented_code, line_number, offset + 1); + container = add_child(container, BLOCK_INDENTED_CODE, line_number, offset + 1); } else { // indent > 4 in lazy line break; } @@ -607,12 +607,12 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) // optional following character if (peek_at(&input, offset) == ' ') offset++; - container = add_child(container, block_quote, line_number, offset + 1); + container = add_child(container, BLOCK_BQUOTE, line_number, offset + 1); } else if ((matched = scan_atx_header_start(&input, first_nonspace))) { offset = first_nonspace + matched; - container = add_child(container, atx_header, line_number, offset + 1); + container = add_child(container, BLOCK_ATX_HEADER, line_number, offset + 1); int hashpos = chunk_strchr(&input, '#', first_nonspace); int level = 0; @@ -625,7 +625,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) } else if ((matched = scan_open_code_fence(&input, first_nonspace))) { - container = add_child(container, fenced_code, line_number, first_nonspace + 1); + container = add_child(container, BLOCK_FENCED_CODE, line_number, first_nonspace + 1); container->attributes.fenced_code_data.fence_char = peek_at(&input, first_nonspace); container->attributes.fenced_code_data.fence_length = matched; container->attributes.fenced_code_data.fence_offset = first_nonspace - offset; @@ -633,24 +633,24 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) } else if ((matched = scan_html_block_tag(&input, first_nonspace))) { - container = add_child(container, html_block, line_number, first_nonspace + 1); + container = add_child(container, BLOCK_HTML, line_number, first_nonspace + 1); // note, we don't adjust offset because the tag is part of the text - } else if (container->tag == paragraph && + } else if (container->tag == BLOCK_PARAGRAPH && (lev = scan_setext_header_line(&input, first_nonspace)) && // check that there is only one line in the paragraph: strbuf_strrchr(&container->string_content, '\n', strbuf_len(&container->string_content) - 2) < 0) { - container->tag = setext_header; + container->tag = BLOCK_SETEXT_HEADER; container->attributes.header_level = lev; offset = input.len - 1; - } else if (!(container->tag == paragraph && !all_matched) && + } else if (!(container->tag == BLOCK_PARAGRAPH && !all_matched) && (matched = scan_hrule(&input, first_nonspace))) { // it's only now that we know the line is not part of a setext header: - container = add_child(container, hrule, line_number, first_nonspace + 1); + container = add_child(container, BLOCK_HRULE, line_number, first_nonspace + 1); finalize(container, line_number); container = container->parent; offset = input.len - 1; @@ -679,15 +679,15 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) data->marker_offset = indent; - if (container->tag != list || + if (container->tag != BLOCK_LIST || !lists_match(container->attributes.list_data, *data)) { - container = add_child(container, list, line_number, + container = add_child(container, BLOCK_LIST, line_number, first_nonspace + 1); container->attributes.list_data = *data; } // add the list item - container = add_child(container, list_item, line_number, + container = add_child(container, BLOCK_LIST_ITEM, line_number, first_nonspace + 1); /* TODO: static */ container->attributes.list_data = *data; @@ -718,9 +718,9 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) // lists or breaking out of lists. we also don't set last_line_blank // on an empty list item. container->last_line_blank = (blank && - container->tag != block_quote && - container->tag != fenced_code && - !(container->tag == list_item && + container->tag != BLOCK_BQUOTE && + container->tag != BLOCK_FENCED_CODE && + !(container->tag == BLOCK_LIST_ITEM && container->children == NULL && container->start_line == line_number)); @@ -733,7 +733,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) if (cur != last_matched_container && container == last_matched_container && !blank && - cur->tag == paragraph && + cur->tag == BLOCK_PARAGRAPH && strbuf_len(&cur->string_content) > 0) { add_line(cur, &input, offset); @@ -747,11 +747,11 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) assert(cur != NULL); } - if (container->tag == indented_code) { + if (container->tag == BLOCK_INDENTED_CODE) { add_line(container, &input, offset); - } else if (container->tag == fenced_code) { + } else if (container->tag == BLOCK_FENCED_CODE) { matched = 0; if (indent <= 3 && @@ -769,7 +769,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) add_line(container, &input, offset); } - } else if (container->tag == html_block) { + } else if (container->tag == BLOCK_HTML) { add_line(container, &input, offset); @@ -777,7 +777,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) // ??? do nothing - } else if (container->tag == atx_header) { + } else if (container->tag == BLOCK_ATX_HEADER) { chop_trailing_hashtags(&input); add_line(container, &input, first_nonspace); @@ -788,10 +788,10 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) add_line(container, &input, first_nonspace); - } else if (container->tag != hrule && container->tag != setext_header) { + } else if (container->tag != BLOCK_HRULE && container->tag != BLOCK_SETEXT_HEADER) { // create paragraph container for line - container = add_child(container, paragraph, line_number, first_nonspace + 1); + container = add_child(container, BLOCK_PARAGRAPH, line_number, first_nonspace + 1); add_line(container, &input, first_nonspace); } else { diff --git a/src/html/html.c b/src/html/html.c index 6041fde..758ec80 100644 --- a/src/html/html.c +++ b/src/html/html.c @@ -39,11 +39,11 @@ void blocks_to_html(strbuf *html, node_block *b, bool tight) while(b != NULL) { switch(b->tag) { - case document: + case BLOCK_DOCUMENT: blocks_to_html(html, b->children, false); break; - case paragraph: + case BLOCK_PARAGRAPH: if (tight) { inlines_to_html(html, b->inline_content); } else { @@ -54,14 +54,14 @@ void blocks_to_html(strbuf *html, node_block *b, bool tight) } break; - case block_quote: + case BLOCK_BQUOTE: cr(html); strbuf_puts(html, "
    \n"); blocks_to_html(html, b->children, false); strbuf_puts(html, "
    \n"); break; - case list_item: + case BLOCK_LIST_ITEM: cr(html); strbuf_puts(html, "
  • "); blocks_to_html(html, b->children, tight); @@ -69,7 +69,7 @@ void blocks_to_html(strbuf *html, node_block *b, bool tight) strbuf_puts(html, "
  • \n"); break; - case list: + case BLOCK_LIST: // make sure a list starts at the beginning of the line: cr(html); data = &(b->attributes.list_data); @@ -87,21 +87,21 @@ void blocks_to_html(strbuf *html, node_block *b, bool tight) strbuf_putc(html, '\n'); break; - case atx_header: - case setext_header: + case BLOCK_ATX_HEADER: + case BLOCK_SETEXT_HEADER: cr(html); strbuf_printf(html, "", b->attributes.header_level); inlines_to_html(html, b->inline_content); strbuf_printf(html, "\n", b->attributes.header_level); break; - case indented_code: - case fenced_code: + case BLOCK_INDENTED_CODE: + case BLOCK_FENCED_CODE: cr(html); strbuf_puts(html, "tag == fenced_code) { + if (b->tag == BLOCK_FENCED_CODE) { strbuf *info = &b->attributes.fenced_code_data.info; if (strbuf_len(info) > 0) { @@ -121,15 +121,15 @@ void blocks_to_html(strbuf *html, node_block *b, bool tight) strbuf_puts(html, "\n"); break; - case html_block: + case BLOCK_HTML: strbuf_put(html, b->string_content.ptr, b->string_content.size); break; - case hrule: + case BLOCK_HRULE: strbuf_puts(html, "
    \n"); break; - case reference_def: + case BLOCK_REFERENCE_DEF: break; default: diff --git a/src/print.c b/src/print.c index 069d299..0ff86fa 100644 --- a/src/print.c +++ b/src/print.c @@ -45,20 +45,20 @@ extern void print_blocks(node_block* b, int indent) } switch(b->tag) { - case document: + case BLOCK_DOCUMENT: printf("document\n"); print_blocks(b->children, indent + 2); break; - case block_quote: + case BLOCK_BQUOTE: printf("block_quote\n"); print_blocks(b->children, indent + 2); break; - case list_item: + case BLOCK_LIST_ITEM: data = &(b->attributes.list_data); printf("list_item\n"); print_blocks(b->children, indent + 2); break; - case list: + case BLOCK_LIST: data = &(b->attributes.list_data); if (data->list_type == ordered) { printf("list (type=ordered tight=%s start=%d delim=%s)\n", @@ -72,27 +72,27 @@ extern void print_blocks(node_block* b, int indent) } print_blocks(b->children, indent + 2); break; - case atx_header: + case BLOCK_ATX_HEADER: printf("atx_header (level=%d)\n", b->attributes.header_level); print_inlines(b->inline_content, indent + 2); break; - case setext_header: + case BLOCK_SETEXT_HEADER: printf("setext_header (level=%d)\n", b->attributes.header_level); print_inlines(b->inline_content, indent + 2); break; - case paragraph: + case BLOCK_PARAGRAPH: printf("paragraph\n"); print_inlines(b->inline_content, indent + 2); break; - case hrule: + case BLOCK_HRULE: printf("hrule\n"); break; - case indented_code: + case BLOCK_INDENTED_CODE: printf("indented_code "); print_str(b->string_content.ptr, -1); putchar('\n'); break; - case fenced_code: + case BLOCK_FENCED_CODE: printf("fenced_code length=%d info=", b->attributes.fenced_code_data.fence_length); print_str(b->attributes.fenced_code_data.info.ptr, -1); @@ -100,12 +100,12 @@ extern void print_blocks(node_block* b, int indent) print_str(b->string_content.ptr, -1); putchar('\n'); break; - case html_block: + case BLOCK_HTML: printf("html_block "); print_str(b->string_content.ptr, -1); putchar('\n'); break; - case reference_def: + case BLOCK_REFERENCE_DEF: printf("reference_def\n"); break; default: diff --git a/src/stmd.h b/src/stmd.h index 957ab03..65063fa 100644 --- a/src/stmd.h +++ b/src/stmd.h @@ -67,30 +67,29 @@ struct FencedCodeData { strbuf info; }; -struct node_block { - enum { - document, - block_quote, - list, - list_item, - fenced_code, - indented_code, - html_block, - paragraph, - atx_header, - setext_header, - hrule, - reference_def +typedef struct Block { + enum { BLOCK_DOCUMENT, + BLOCK_BQUOTE, + BLOCK_LIST, + BLOCK_LIST_ITEM, + BLOCK_FENCED_CODE, + BLOCK_INDENTED_CODE, + BLOCK_HTML, + BLOCK_PARAGRAPH, + BLOCK_ATX_HEADER, + BLOCK_SETEXT_HEADER, + BLOCK_HRULE, + BLOCK_REFERENCE_DEF } tag; int start_line; int start_column; int end_line; bool open; bool last_line_blank; - struct node_block* children; - struct node_block* last_child; - struct node_block* parent; - struct node_block* top; + struct Block* children; + struct Block* last_child; + struct Block* parent; + struct Block* top; strbuf string_content; node_inl* inline_content; union { @@ -99,11 +98,9 @@ struct node_block { int header_level; reference** refmap; } attributes; - struct node_block * next; - struct node_block * prev; -}; - -typedef struct node_block node_block; + struct Block * next; + struct Block * prev; +} node_block; node_inl* parse_inlines(strbuf *input, reference** refmap); void free_inlines(node_inl* e); -- cgit v1.2.3 From 61e3e606e64221eaa5cf3d83dc598d5a42818d10 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Sat, 6 Sep 2014 20:48:05 +0200 Subject: UTF8-aware detabbing and entity handling --- Makefile | 13 ++++++----- src/blocks.c | 35 ++++++------------------------ src/html/houdini.h | 2 ++ src/html/html.c | 1 - src/inlines.c | 63 ++++++++++++++++++++++-------------------------------- src/print.c | 5 ----- src/stmd.h | 3 --- src/utf8.c | 59 ++++++++++++++++++++++++++++++++++++++++++++------ 8 files changed, 95 insertions(+), 86 deletions(-) (limited to 'src/print.c') diff --git a/Makefile b/Makefile index 0d2eb8b..b5e487d 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -CFLAGS=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS) -LDFLAGS=-g -O3 -Wall -Werror +CFLAGS=-g -pg -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS) +LDFLAGS=-g -pg -O3 -Wall -Werror SRCDIR=src DATADIR=data @@ -41,11 +41,11 @@ testjs: spec.txt benchjs: node js/bench.js ${BENCHINP} -HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o +HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o $(SRCDIR)/html/houdini_html_u.o STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o -$(PROG): $(SRCDIR)/main.c $(HTML_OBJ) $(STMD_OBJ) - $(CC) $(LDFLAGS) -o $@ $^ +$(PROG): $(SRCDIR)/html/html_unescape.h $(SRCDIR)/case_fold_switch.inc $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c + $(CC) $(LDFLAGS) -o $@ $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re re2c --case-insensitive -bis $< > $@ || (rm $@ && false) @@ -53,6 +53,9 @@ $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re $(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding-3.2.0.txt perl mkcasefold.pl < $< > $@ +$(SRCDIR)/html/html_unescape.h: $(SRCDIR)/html/html_unescape.gperf + gperf -I -t -N find_entity -H hash_entity -K entity -C -l --null-strings -m5 $< > $@ + .PHONY: leakcheck clean fuzztest dingus upload dingus: diff --git a/src/blocks.c b/src/blocks.c index f671b5e..8c7d49c 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -5,6 +5,8 @@ #include #include "stmd.h" +#include "utf8.h" +#include "html/houdini.h" #include "scanners.h" #include "uthash.h" @@ -184,7 +186,7 @@ static void finalize(node_block* b, int line_number) firstlinelen = strbuf_strchr(&b->string_content, '\n', 0); strbuf_init(&b->attributes.fenced_code_data.info, 0); - strbuf_set( + houdini_unescape_html_f( &b->attributes.fenced_code_data.info, b->string_content.ptr, firstlinelen @@ -369,31 +371,6 @@ static int lists_match(struct ListData list_data, list_data.bullet_char == item_data.bullet_char); } -static void expand_tabs(strbuf *ob, const unsigned char *line, size_t size) -{ - size_t i = 0, tab = 0; - - while (i < size) { - size_t org = i; - - while (i < size && line[i] != '\t') { - i++; tab++; - } - - if (i > org) - strbuf_put(ob, line + org, i - org); - - if (i >= size) - break; - - do { - strbuf_putc(ob, ' '); tab++; - } while (tab % 4); - - i++; - } -} - static node_block *finalize_document(node_block *document, int linenum) { while (document != document->top) { @@ -415,7 +392,7 @@ extern node_block *stmd_parse_file(FILE *f) node_block *document = make_document(); while (fgets((char *)buffer, sizeof(buffer), f)) { - expand_tabs(&line, buffer, strlen((char *)buffer)); + utf8proc_detab(&line, buffer, strlen((char *)buffer)); incorporate_line(&line, linenum, &document); strbuf_clear(&line); linenum++; @@ -436,10 +413,10 @@ extern node_block *stmd_parse_document(const unsigned char *buffer, size_t len) const unsigned char *eol = memchr(buffer, '\n', end - buffer); if (!eol) { - expand_tabs(&line, buffer, end - buffer); + utf8proc_detab(&line, buffer, end - buffer); buffer = end; } else { - expand_tabs(&line, buffer, (eol - buffer) + 1); + utf8proc_detab(&line, buffer, (eol - buffer) + 1); buffer += (eol - buffer) + 1; } diff --git a/src/html/houdini.h b/src/html/houdini.h index 1e54d20..5fd690d 100644 --- a/src/html/houdini.h +++ b/src/html/houdini.h @@ -25,9 +25,11 @@ extern "C" { #define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10) #define HOUDINI_UNESCAPED_SIZE(x) (x) +extern size_t houdini_unescape_ent(strbuf *ob, const uint8_t *src, size_t size); extern int houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size); extern int houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure); extern int houdini_unescape_html(strbuf *ob, const uint8_t *src, size_t size); +extern void houdini_unescape_html_f(strbuf *ob, const uint8_t *src, size_t size); extern int houdini_escape_xml(strbuf *ob, const uint8_t *src, size_t size); extern int houdini_escape_uri(strbuf *ob, const uint8_t *src, size_t size); extern int houdini_escape_url(strbuf *ob, const uint8_t *src, size_t size); diff --git a/src/html/html.c b/src/html/html.c index 758ec80..595dfcd 100644 --- a/src/html/html.c +++ b/src/html/html.c @@ -166,7 +166,6 @@ void inlines_to_html(strbuf *html, node_inl* ils) break; case INL_RAW_HTML: - case INL_ENTITY: strbuf_put(html, ils->content.literal.data, ils->content.literal.len); diff --git a/src/inlines.c b/src/inlines.c index 6b17027..7b27150 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -5,6 +5,8 @@ #include #include "stmd.h" +#include "html/houdini.h" +#include "utf8.h" #include "uthash.h" #include "scanners.h" @@ -176,7 +178,6 @@ inline static node_inl* make_simple(int t) #define make_str(s) make_literal(INL_STRING, s) #define make_code(s) make_literal(INL_CODE, s) #define make_raw_html(s) make_literal(INL_RAW_HTML, s) -#define make_entity(s) make_literal(INL_ENTITY, s) #define make_linebreak() make_simple(INL_LINEBREAK) #define make_softbreak() make_simple(INL_SOFTBREAK) #define make_emph(contents) make_inlines(INL_EMPH, contents) @@ -191,7 +192,6 @@ extern void free_inlines(node_inl* e) case INL_STRING: case INL_RAW_HTML: case INL_CODE: - case INL_ENTITY: chunk_free(&e->content.literal); break; case INL_LINEBREAK: @@ -540,45 +540,34 @@ static node_inl* handle_backslash(subject *subj) // Assumes the subject has an '&' character at the current position. static node_inl* handle_entity(subject* subj) { - int match; - node_inl *result; - match = scan_entity(&subj->input, subj->pos); - if (match) { - result = make_entity(chunk_dup(&subj->input, subj->pos, match)); - subj->pos += match; - } else { - advance(subj); - result = make_str(chunk_literal("&")); - } - return result; + strbuf ent = GH_BUF_INIT; + size_t len; + + advance(subj); + + len = houdini_unescape_ent(&ent, + subj->input.data + subj->pos, + subj->input.len - subj->pos + ); + + if (len == 0) + return make_str(chunk_literal("&")); + + subj->pos += len; + return make_str(chunk_buf_detach(&ent)); } // Like make_str, but parses entities. // Returns an inline sequence consisting of str and entity elements. static node_inl *make_str_with_entities(chunk *content) { - node_inl *result = NULL; - node_inl *new; - int searchpos; - char c; - subject subj; - - subject_from_chunk(&subj, content, NULL); + strbuf unescaped = GH_BUF_INIT; - while ((c = peek_char(&subj))) { - switch (c) { - case '&': - new = handle_entity(&subj); - break; - default: - searchpos = chunk_strchr(&subj.input, '&', subj.pos); - new = make_str(chunk_dup(&subj.input, subj.pos, searchpos - subj.pos)); - subj.pos = searchpos; - } - result = append_inlines(result, new); + if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) { + return make_str(chunk_buf_detach(&unescaped)); + } else { + return make_str(*content); } - - return result; } // Destructively unescape a string: remove backslashes before punctuation chars. @@ -611,9 +600,9 @@ static unsigned char *clean_url(chunk *url, int is_email) strbuf_puts(&buf, "mailto:"); if (url->data[0] == '<' && url->data[url->len - 1] == '>') { - strbuf_put(&buf, url->data + 1, url->len - 2); + houdini_unescape_html_f(&buf, url->data + 1, url->len - 2); } else { - strbuf_put(&buf, url->data, url->len); + houdini_unescape_html_f(&buf, url->data, url->len); } unescape_buffer(&buf); @@ -636,9 +625,9 @@ static unsigned char *clean_title(chunk *title) if ((first == '\'' && last == '\'') || (first == '(' && last == ')') || (first == '"' && last == '"')) { - strbuf_set(&buf, title->data + 1, title->len - 2); + houdini_unescape_html_f(&buf, title->data + 1, title->len - 2); } else { - strbuf_set(&buf, title->data, title->len); + houdini_unescape_html_f(&buf, title->data, title->len); } unescape_buffer(&buf); diff --git a/src/print.c b/src/print.c index 0ff86fa..9240dac 100644 --- a/src/print.c +++ b/src/print.c @@ -145,11 +145,6 @@ extern void print_inlines(node_inl* ils, int indent) print_str(ils->content.literal.data, ils->content.literal.len); putchar('\n'); break; - case INL_ENTITY: - printf("entity "); - print_str(ils->content.literal.data, ils->content.literal.len); - putchar('\n'); - break; case INL_LINK: case INL_IMAGE: printf("%s url=", ils->tag == INL_LINK ? "link" : "image"); diff --git a/src/stmd.h b/src/stmd.h index be65371..c80eeda 100644 --- a/src/stmd.h +++ b/src/stmd.h @@ -17,7 +17,6 @@ struct node_inl { INL_LINEBREAK, INL_CODE, INL_RAW_HTML, - INL_ENTITY, INL_EMPH, INL_STRONG, INL_LINK, @@ -133,6 +132,4 @@ void print_blocks(node_block* blk, int indent); void blocks_to_html(strbuf *html, node_block *b, bool tight); void inlines_to_html(strbuf *html, node_inl *b); -void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len); - #endif diff --git a/src/utf8.c b/src/utf8.c index cebd872..12d7ba5 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -3,7 +3,7 @@ #include #include -#include "stmd.h" +#include "utf8.h" static const int8_t utf8proc_utf8class[256] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -23,6 +23,12 @@ static const int8_t utf8proc_utf8class[256] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 }; +static void encode_unknown(strbuf *buf) +{ + static const unsigned char repl[] = {239, 191, 189}; + strbuf_put(buf, repl, 3); +} + ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len) { ssize_t length, i; @@ -46,6 +52,46 @@ ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len) return length; } +void utf8proc_detab(strbuf *ob, const unsigned char *line, size_t size) +{ + static const unsigned char whitespace[] = " "; + + size_t i = 0, tab = 0; + + while (i < size) { + size_t org = i; + + while (i < size && line[i] != '\t' && line[i] <= 0x80) { + i++; tab++; + } + + if (i > org) + strbuf_put(ob, line + org, i - org); + + if (i >= size) + break; + + if (line[i] == '\t') { + int numspaces = 4 - (tab % 4); + strbuf_put(ob, whitespace, numspaces); + i += 1; + tab += numspaces; + } else { + ssize_t charlen = utf8proc_charlen(line + i, size - i); + + if (charlen < 0) { + encode_unknown(ob); + i++; + } else { + strbuf_put(ob, line + i, charlen); + i += charlen; + } + + tab += 1; + } + } +} + ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst) { ssize_t length; @@ -89,9 +135,9 @@ void utf8proc_encode_char(int32_t uc, strbuf *buf) unsigned char dst[4]; int len = 0; - if (uc < 0x00) { - assert(false); - } else if (uc < 0x80) { + assert(uc >= 0); + + if (uc < 0x80) { dst[0] = uc; len = 1; } else if (uc < 0x800) { @@ -116,7 +162,8 @@ void utf8proc_encode_char(int32_t uc, strbuf *buf) dst[3] = 0x80 + (uc & 0x3F); len = 4; } else { - assert(false); + encode_unknown(buf); + return; } strbuf_put(buf, dst, len); @@ -133,7 +180,7 @@ void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len) ssize_t char_len = utf8proc_iterate(str, len, &c); if (char_len < 0) { - bufpush(0xFFFD); + encode_unknown(dest); continue; } -- cgit v1.2.3 From 7426f9ae60272a19bd4611b8579647118033a1e6 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Sun, 7 Sep 2014 22:48:33 +0200 Subject: Abstract the Block union --- src/blocks.c | 94 +++++++++++++++++++++++++++------------------------------ src/html/html.c | 8 ++--- src/print.c | 11 +++---- src/stmd.h | 20 +++++++----- 4 files changed, 67 insertions(+), 66 deletions(-) (limited to 'src/print.c') diff --git a/src/blocks.c b/src/blocks.c index 8c7d49c..72b2dc2 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -18,35 +18,32 @@ static void finalize(node_block* b, int line_number); static node_block* make_block(int tag, int start_line, int start_column) { node_block* e; - e = (node_block*) malloc(sizeof(node_block)); + + e = malloc(sizeof(node_block)); + memset(e, 0x0, sizeof(*e)); + e->tag = tag; e->open = true; - e->last_line_blank = false; e->start_line = start_line; e->start_column = start_column; e->end_line = start_line; - e->children = NULL; - e->last_child = NULL; - e->parent = NULL; - e->top = NULL; - e->attributes.refmap = NULL; strbuf_init(&e->string_content, 32); - e->inline_content = NULL; - e->next = NULL; - e->prev = NULL; + return e; } // Create a root document node_block. extern node_block* make_document() { - node_block * e = make_block(BLOCK_DOCUMENT, 1, 1); - reference * map = NULL; + node_block *e = make_block(BLOCK_DOCUMENT, 1, 1); + reference *map = NULL; reference ** refmap; + refmap = (reference**) malloc(sizeof(reference*)); *refmap = map; - e->attributes.refmap = refmap; + e->as.document.refmap = refmap; e->top = e; + return e; } @@ -128,8 +125,8 @@ static bool ends_with_blank_line(node_block* node_block) // Break out of all containing lists static int break_out_of_lists(node_block ** bptr, int line_number) { - node_block * container = *bptr; - node_block * b = container->top; + node_block *container = *bptr; + node_block *b = container->top; // find first containing BLOCK_LIST: while (b && b->tag != BLOCK_LIST) { b = b->last_child; @@ -167,7 +164,7 @@ static void finalize(node_block* b, int line_number) case BLOCK_PARAGRAPH: pos = 0; while (strbuf_at(&b->string_content, 0) == '[' && - (pos = parse_reference(&b->string_content, b->top->attributes.refmap))) { + (pos = parse_reference(&b->string_content, b->top->as.document.refmap))) { strbuf_drop(&b->string_content, pos); } @@ -185,27 +182,27 @@ static void finalize(node_block* b, int line_number) // first line of contents becomes info firstlinelen = strbuf_strchr(&b->string_content, '\n', 0); - strbuf_init(&b->attributes.fenced_code_data.info, 0); + strbuf_init(&b->as.code.info, 0); houdini_unescape_html_f( - &b->attributes.fenced_code_data.info, + &b->as.code.info, b->string_content.ptr, firstlinelen ); strbuf_drop(&b->string_content, firstlinelen + 1); - strbuf_trim(&b->attributes.fenced_code_data.info); - unescape_buffer(&b->attributes.fenced_code_data.info); + strbuf_trim(&b->as.code.info); + unescape_buffer(&b->as.code.info); break; case BLOCK_LIST: // determine tight/loose status - b->attributes.list_data.tight = true; // tight by default + b->as.list.tight = true; // tight by default item = b->children; while (item) { // check for non-final non-empty list item ending with blank line: if (item->last_line_blank && item->next) { - b->attributes.list_data.tight = false; + b->as.list.tight = false; break; } // recurse into children of list item, to see if there are @@ -214,12 +211,12 @@ static void finalize(node_block* b, int line_number) while (subitem) { if (ends_with_blank_line(subitem) && (item->next || subitem->next)) { - b->attributes.list_data.tight = false; + b->as.list.tight = false; break; } subitem = subitem->next; } - if (!(b->attributes.list_data.tight)) { + if (!(b->as.list.tight)) { break; } item = item->next; @@ -269,9 +266,9 @@ extern void free_blocks(node_block* e) free_inlines(e->inline_content); strbuf_free(&e->string_content); if (e->tag == BLOCK_FENCED_CODE) { - strbuf_free(&e->attributes.fenced_code_data.info); + strbuf_free(&e->as.code.info); } else if (e->tag == BLOCK_DOCUMENT) { - free_reference_map(e->attributes.refmap); + free_reference_map(e->as.document.refmap); } free_blocks(e->children); free(e); @@ -362,13 +359,12 @@ static int parse_list_marker(chunk *input, int pos, struct ListData ** dataptr) } // Return 1 if list item belongs in list, else 0. -static int lists_match(struct ListData list_data, - struct ListData item_data) +static int lists_match(struct ListData *list_data, struct ListData *item_data) { - return (list_data.list_type == item_data.list_type && - list_data.delimiter == item_data.delimiter && - // list_data.marker_offset == item_data.marker_offset && - list_data.bullet_char == item_data.bullet_char); + return (list_data->list_type == item_data->list_type && + list_data->delimiter == item_data->delimiter && + // list_data->marker_offset == item_data.marker_offset && + list_data->bullet_char == item_data->bullet_char); } static node_block *finalize_document(node_block *document, int linenum) @@ -379,7 +375,7 @@ static node_block *finalize_document(node_block *document, int linenum) } finalize(document, linenum); - process_inlines(document, document->attributes.refmap); + process_inlines(document, document->as.document.refmap); return document; } @@ -496,10 +492,10 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) } else if (container->tag == BLOCK_LIST_ITEM) { - if (indent >= container->attributes.list_data.marker_offset + - container->attributes.list_data.padding) { - offset += container->attributes.list_data.marker_offset + - container->attributes.list_data.padding; + if (indent >= container->as.list.marker_offset + + container->as.list.padding) { + offset += container->as.list.marker_offset + + container->as.list.padding; } else if (blank) { offset = first_nonspace; } else { @@ -525,7 +521,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) } else if (container->tag == BLOCK_FENCED_CODE) { // skip optional spaces of fence offset - i = container->attributes.fenced_code_data.fence_offset; + i = container->as.code.fence_offset; while (i > 0 && peek_at(&input, offset) == ' ') { offset++; i--; @@ -598,14 +594,14 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) level++; hashpos++; } - container->attributes.header_level = level; + container->as.header.level = level; } else if ((matched = scan_open_code_fence(&input, first_nonspace))) { container = add_child(container, BLOCK_FENCED_CODE, line_number, first_nonspace + 1); - container->attributes.fenced_code_data.fence_char = peek_at(&input, first_nonspace); - container->attributes.fenced_code_data.fence_length = matched; - container->attributes.fenced_code_data.fence_offset = first_nonspace - offset; + container->as.code.fence_char = peek_at(&input, first_nonspace); + container->as.code.fence_length = matched; + container->as.code.fence_offset = first_nonspace - offset; offset = first_nonspace + matched; } else if ((matched = scan_html_block_tag(&input, first_nonspace))) { @@ -620,7 +616,7 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) strbuf_len(&container->string_content) - 2) < 0) { container->tag = BLOCK_SETEXT_HEADER; - container->attributes.header_level = lev; + container->as.header.level = lev; offset = input.len - 1; } else if (!(container->tag == BLOCK_PARAGRAPH && !all_matched) && @@ -657,19 +653,19 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) data->marker_offset = indent; if (container->tag != BLOCK_LIST || - !lists_match(container->attributes.list_data, *data)) { + !lists_match(&container->as.list, data)) { container = add_child(container, BLOCK_LIST, line_number, first_nonspace + 1); - container->attributes.list_data = *data; + + memcpy(&container->as.list, data, sizeof(*data)); } // add the list item container = add_child(container, BLOCK_LIST_ITEM, line_number, first_nonspace + 1); /* TODO: static */ - container->attributes.list_data = *data; + memcpy(&container->as.list, data, sizeof(*data)); free(data); - } else { break; } @@ -732,9 +728,9 @@ static void incorporate_line(strbuf *line, int line_number, node_block** curptr) matched = 0; if (indent <= 3 && - peek_at(&input, first_nonspace) == container->attributes.fenced_code_data.fence_char) { + peek_at(&input, first_nonspace) == container->as.code.fence_char) { int fence_len = scan_close_code_fence(&input, first_nonspace); - if (fence_len > container->attributes.fenced_code_data.fence_length) + if (fence_len > container->as.code.fence_length) matched = 1; } diff --git a/src/html/html.c b/src/html/html.c index 595dfcd..129335f 100644 --- a/src/html/html.c +++ b/src/html/html.c @@ -72,7 +72,7 @@ void blocks_to_html(strbuf *html, node_block *b, bool tight) case BLOCK_LIST: // make sure a list starts at the beginning of the line: cr(html); - data = &(b->attributes.list_data); + data = &(b->as.list); if (data->start > 1) { strbuf_printf(html, "<%s start=\"%d\">\n", @@ -90,9 +90,9 @@ void blocks_to_html(strbuf *html, node_block *b, bool tight) case BLOCK_ATX_HEADER: case BLOCK_SETEXT_HEADER: cr(html); - strbuf_printf(html, "", b->attributes.header_level); + strbuf_printf(html, "", b->as.header.level); inlines_to_html(html, b->inline_content); - strbuf_printf(html, "\n", b->attributes.header_level); + strbuf_printf(html, "\n", b->as.header.level); break; case BLOCK_INDENTED_CODE: @@ -102,7 +102,7 @@ void blocks_to_html(strbuf *html, node_block *b, bool tight) strbuf_puts(html, "tag == BLOCK_FENCED_CODE) { - strbuf *info = &b->attributes.fenced_code_data.info; + strbuf *info = &b->as.code.info; if (strbuf_len(info) > 0) { int first_tag = strbuf_strchr(info, ' ', 0); diff --git a/src/print.c b/src/print.c index 9240dac..36140a8 100644 --- a/src/print.c +++ b/src/print.c @@ -54,12 +54,11 @@ extern void print_blocks(node_block* b, int indent) print_blocks(b->children, indent + 2); break; case BLOCK_LIST_ITEM: - data = &(b->attributes.list_data); printf("list_item\n"); print_blocks(b->children, indent + 2); break; case BLOCK_LIST: - data = &(b->attributes.list_data); + data = &(b->as.list); if (data->list_type == ordered) { printf("list (type=ordered tight=%s start=%d delim=%s)\n", (data->tight ? "true" : "false"), @@ -73,11 +72,11 @@ extern void print_blocks(node_block* b, int indent) print_blocks(b->children, indent + 2); break; case BLOCK_ATX_HEADER: - printf("atx_header (level=%d)\n", b->attributes.header_level); + printf("atx_header (level=%d)\n", b->as.header.level); print_inlines(b->inline_content, indent + 2); break; case BLOCK_SETEXT_HEADER: - printf("setext_header (level=%d)\n", b->attributes.header_level); + printf("setext_header (level=%d)\n", b->as.header.level); print_inlines(b->inline_content, indent + 2); break; case BLOCK_PARAGRAPH: @@ -94,8 +93,8 @@ extern void print_blocks(node_block* b, int indent) break; case BLOCK_FENCED_CODE: printf("fenced_code length=%d info=", - b->attributes.fenced_code_data.fence_length); - print_str(b->attributes.fenced_code_data.info.ptr, -1); + b->as.code.fence_length); + print_str(b->as.code.info.ptr, -1); putchar(' '); print_str(b->string_content.ptr, -1); putchar('\n'); diff --git a/src/stmd.h b/src/stmd.h index c80eeda..21a86b0 100644 --- a/src/stmd.h +++ b/src/stmd.h @@ -96,14 +96,20 @@ struct node_block { struct node_block* top; strbuf string_content; node_inl* inline_content; + union { - struct ListData list_data; - struct FencedCodeData fenced_code_data; - int header_level; - reference** refmap; - } attributes; - struct node_block * next; - struct node_block * prev; + struct ListData list; + struct FencedCodeData code; + struct { + int level; + } header; + struct { + reference** refmap; + } document; + } as; + + struct node_block *next; + struct node_block *prev; }; typedef struct node_block node_block; -- cgit v1.2.3 From 118e3d3c39242225baa876319cdbfbb1adadc77b Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Mon, 15 Sep 2014 15:28:49 +0200 Subject: Cleanup external APIs --- src/blocks.c | 11 ++-- src/html/html.c | 163 ++++++++++++++++++++++++++++--------------------------- src/inlines.c | 1 + src/main.c | 8 +-- src/print.c | 114 +++++++++++++++++++------------------- src/references.c | 1 + src/stmd.h | 26 ++------- 7 files changed, 159 insertions(+), 165 deletions(-) (limited to 'src/print.c') diff --git a/src/blocks.c b/src/blocks.c index 30a8284..2ac7032 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -6,8 +6,9 @@ #include "stmd.h" #include "utf8.h" -#include "html/houdini.h" #include "scanners.h" +#include "inlines.h" +#include "html/houdini.h" #define peek_at(i, n) (i)->data[n] @@ -224,7 +225,7 @@ static void finalize(node_block* b, int line_number) } // Add a node_block as child of another. Return pointer to child. -extern node_block* add_child(node_block* parent, +static node_block* add_child(node_block* parent, int block_type, int start_line, int start_column) { assert(parent); @@ -252,7 +253,7 @@ extern node_block* add_child(node_block* parent, } // Free a node_block list and any children. -extern void free_blocks(node_block* e) +void stmd_free_nodes(node_block *e) { node_block * next; while (e != NULL) { @@ -264,7 +265,7 @@ extern void free_blocks(node_block* e) } else if (e->tag == BLOCK_DOCUMENT) { reference_map_free(e->as.document.refmap); } - free_blocks(e->children); + stmd_free_nodes(e->children); free(e); e = next; } @@ -279,8 +280,6 @@ void process_inlines(node_block* cur, reference_map *refmap) case BLOCK_ATX_HEADER: case BLOCK_SETEXT_HEADER: cur->inline_content = parse_inlines(&cur->string_content, refmap); - // MEM - // strbuf_free(&cur->string_content); break; default: diff --git a/src/html/html.c b/src/html/html.c index b48b10b..6f3bc76 100644 --- a/src/html/html.c +++ b/src/html/html.c @@ -32,8 +32,89 @@ static inline void cr(strbuf *html) strbuf_putc(html, '\n'); } +// Convert an inline list to HTML. Returns 0 on success, and sets result. +static void inlines_to_html(strbuf *html, node_inl* ils) +{ + strbuf scrap = GH_BUF_INIT; + + while(ils != NULL) { + switch(ils->tag) { + case INL_STRING: + escape_html(html, ils->content.literal.data, ils->content.literal.len); + break; + + case INL_LINEBREAK: + strbuf_puts(html, "
    \n"); + break; + + case INL_SOFTBREAK: + strbuf_putc(html, '\n'); + break; + + case INL_CODE: + strbuf_puts(html, ""); + escape_html(html, ils->content.literal.data, ils->content.literal.len); + strbuf_puts(html, ""); + break; + + case INL_RAW_HTML: + strbuf_put(html, + ils->content.literal.data, + ils->content.literal.len); + break; + + case INL_LINK: + strbuf_puts(html, "
    content.linkable.url) + escape_href(html, ils->content.linkable.url, -1); + + if (ils->content.linkable.title) { + strbuf_puts(html, "\" title=\""); + escape_html(html, ils->content.linkable.title, -1); + } + + strbuf_puts(html, "\">"); + inlines_to_html(html, ils->content.inlines); + strbuf_puts(html, ""); + break; + + case INL_IMAGE: + strbuf_puts(html, "content.linkable.url) + escape_href(html, ils->content.linkable.url, -1); + + inlines_to_html(&scrap, ils->content.inlines); + strbuf_puts(html, "\" alt=\""); + if (scrap.size) + escape_html(html, scrap.ptr, scrap.size); + strbuf_clear(&scrap); + + if (ils->content.linkable.title) { + strbuf_puts(html, "\" title=\""); + escape_html(html, ils->content.linkable.title, -1); + } + + strbuf_puts(html, "\"/>"); + break; + + case INL_STRONG: + strbuf_puts(html, ""); + inlines_to_html(html, ils->content.inlines); + strbuf_puts(html, ""); + break; + + case INL_EMPH: + strbuf_puts(html, ""); + inlines_to_html(html, ils->content.inlines); + strbuf_puts(html, ""); + break; + } + ils = ils->next; + } +} + // Convert a node_block list to HTML. Returns 0 on success, and sets result. -void blocks_to_html(strbuf *html, node_block *b, bool tight) +static void blocks_to_html(strbuf *html, node_block *b, bool tight) { struct ListData *data; @@ -139,83 +220,7 @@ void blocks_to_html(strbuf *html, node_block *b, bool tight) } } -// Convert an inline list to HTML. Returns 0 on success, and sets result. -void inlines_to_html(strbuf *html, node_inl* ils) +void stmd_render_html(strbuf *html, node_block *root) { - strbuf scrap = GH_BUF_INIT; - - while(ils != NULL) { - switch(ils->tag) { - case INL_STRING: - escape_html(html, ils->content.literal.data, ils->content.literal.len); - break; - - case INL_LINEBREAK: - strbuf_puts(html, "
    \n"); - break; - - case INL_SOFTBREAK: - strbuf_putc(html, '\n'); - break; - - case INL_CODE: - strbuf_puts(html, ""); - escape_html(html, ils->content.literal.data, ils->content.literal.len); - strbuf_puts(html, ""); - break; - - case INL_RAW_HTML: - strbuf_put(html, - ils->content.literal.data, - ils->content.literal.len); - break; - - case INL_LINK: - strbuf_puts(html, "content.linkable.url) - escape_href(html, ils->content.linkable.url, -1); - - if (ils->content.linkable.title) { - strbuf_puts(html, "\" title=\""); - escape_html(html, ils->content.linkable.title, -1); - } - - strbuf_puts(html, "\">"); - inlines_to_html(html, ils->content.inlines); - strbuf_puts(html, ""); - break; - - case INL_IMAGE: - strbuf_puts(html, "content.linkable.url) - escape_href(html, ils->content.linkable.url, -1); - - inlines_to_html(&scrap, ils->content.inlines); - strbuf_puts(html, "\" alt=\""); - if (scrap.size) - escape_html(html, scrap.ptr, scrap.size); - strbuf_clear(&scrap); - - if (ils->content.linkable.title) { - strbuf_puts(html, "\" title=\""); - escape_html(html, ils->content.linkable.title, -1); - } - - strbuf_puts(html, "\"/>"); - break; - - case INL_STRONG: - strbuf_puts(html, ""); - inlines_to_html(html, ils->content.inlines); - strbuf_puts(html, ""); - break; - - case INL_EMPH: - strbuf_puts(html, ""); - inlines_to_html(html, ils->content.inlines); - strbuf_puts(html, ""); - break; - } - ils = ils->next; - } + blocks_to_html(html, root, false); } diff --git a/src/inlines.c b/src/inlines.c index cd2d124..145825c 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -8,6 +8,7 @@ #include "html/houdini.h" #include "utf8.h" #include "scanners.h" +#include "inlines.h" typedef struct Subject { chunk input; diff --git a/src/main.c b/src/main.c index 90bb16d..76a0e12 100644 --- a/src/main.c +++ b/src/main.c @@ -17,9 +17,9 @@ static void print_document(node_block *document, bool ast) strbuf html = GH_BUF_INIT; if (ast) { - print_blocks(document, 0); + stmd_debug_print(document); } else { - blocks_to_html(&html, document, false); + stmd_render_html(&html, document); printf("%s", html.ptr); strbuf_free(&html); } @@ -54,7 +54,7 @@ int main(int argc, char *argv[]) if (numfps == 0) { document = stmd_parse_file(stdin); print_document(document, ast); - free_blocks(document); + stmd_free_nodes(document); } else { for (i = 0; i < numfps; i++) { FILE *fp = fopen(argv[files[i]], "r"); @@ -67,7 +67,7 @@ int main(int argc, char *argv[]) document = stmd_parse_file(fp); print_document(document, ast); - free_blocks(document); + stmd_free_nodes(document); fclose(fp); } } diff --git a/src/print.c b/src/print.c index 36140a8..83f8daa 100644 --- a/src/print.c +++ b/src/print.c @@ -32,14 +32,69 @@ static void print_str(const unsigned char *s, int len) putchar('"'); } +// Prettyprint an inline list, for debugging. +static void print_inlines(node_inl* ils, int indent) +{ + while(ils != NULL) { + for (int i=0; i < indent; i++) { + putchar(' '); + } + switch(ils->tag) { + case INL_STRING: + printf("str "); + print_str(ils->content.literal.data, ils->content.literal.len); + putchar('\n'); + break; + case INL_LINEBREAK: + printf("linebreak\n"); + break; + case INL_SOFTBREAK: + printf("softbreak\n"); + break; + case INL_CODE: + printf("code "); + print_str(ils->content.literal.data, ils->content.literal.len); + putchar('\n'); + break; + case INL_RAW_HTML: + printf("html "); + print_str(ils->content.literal.data, ils->content.literal.len); + putchar('\n'); + break; + case INL_LINK: + case INL_IMAGE: + printf("%s url=", ils->tag == INL_LINK ? "link" : "image"); + + if (ils->content.linkable.url) + print_str(ils->content.linkable.url, -1); + + if (ils->content.linkable.title) { + printf(" title="); + print_str(ils->content.linkable.title, -1); + } + putchar('\n'); + print_inlines(ils->content.linkable.label, indent + 2); + break; + case INL_STRONG: + printf("strong\n"); + print_inlines(ils->content.linkable.label, indent + 2); + break; + case INL_EMPH: + printf("emph\n"); + print_inlines(ils->content.linkable.label, indent + 2); + break; + } + ils = ils->next; + } +} + // Functions to pretty-print inline and node_block lists, for debugging. // Prettyprint an inline list, for debugging. -extern void print_blocks(node_block* b, int indent) +static void print_blocks(node_block* b, int indent) { struct ListData *data; while(b != NULL) { - // printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line); for (int i=0; i < indent; i++) { putchar(' '); } @@ -115,58 +170,7 @@ extern void print_blocks(node_block* b, int indent) } } -// Prettyprint an inline list, for debugging. -extern void print_inlines(node_inl* ils, int indent) +void stmd_debug_print(node_block *root) { - while(ils != NULL) { - for (int i=0; i < indent; i++) { - putchar(' '); - } - switch(ils->tag) { - case INL_STRING: - printf("str "); - print_str(ils->content.literal.data, ils->content.literal.len); - putchar('\n'); - break; - case INL_LINEBREAK: - printf("linebreak\n"); - break; - case INL_SOFTBREAK: - printf("softbreak\n"); - break; - case INL_CODE: - printf("code "); - print_str(ils->content.literal.data, ils->content.literal.len); - putchar('\n'); - break; - case INL_RAW_HTML: - printf("html "); - print_str(ils->content.literal.data, ils->content.literal.len); - putchar('\n'); - break; - case INL_LINK: - case INL_IMAGE: - printf("%s url=", ils->tag == INL_LINK ? "link" : "image"); - - if (ils->content.linkable.url) - print_str(ils->content.linkable.url, -1); - - if (ils->content.linkable.title) { - printf(" title="); - print_str(ils->content.linkable.title, -1); - } - putchar('\n'); - print_inlines(ils->content.linkable.label, indent + 2); - break; - case INL_STRONG: - printf("strong\n"); - print_inlines(ils->content.linkable.label, indent + 2); - break; - case INL_EMPH: - printf("emph\n"); - print_inlines(ils->content.linkable.label, indent + 2); - break; - } - ils = ils->next; - } + print_blocks(root, 0); } diff --git a/src/references.c b/src/references.c index 300bbcc..3e54b48 100644 --- a/src/references.c +++ b/src/references.c @@ -1,6 +1,7 @@ #include "stmd.h" #include "utf8.h" #include "references.h" +#include "inlines.h" static unsigned int refhash(const unsigned char *link_ref) diff --git a/src/stmd.h b/src/stmd.h index 4e21e6c..c6473a6 100644 --- a/src/stmd.h +++ b/src/stmd.h @@ -104,28 +104,12 @@ struct node_block { typedef struct node_block node_block; -node_inl* parse_inlines(strbuf *input, reference_map *refmap); -void free_inlines(node_inl* e); +node_block *stmd_parse_document(const unsigned char *buffer, size_t len); +node_block *stmd_parse_file(FILE *f); -int parse_reference_inline(strbuf *input, reference_map *refmap); -void unescape_buffer(strbuf *buf); +void stmd_free_nodes(node_block *e); -extern node_block* make_document(); -extern node_block* add_child(node_block* parent, - int block_type, int start_line, int start_column); -void free_blocks(node_block* e); - -extern node_block *stmd_parse_document(const unsigned char *buffer, size_t len); -extern node_block *stmd_parse_file(FILE *f); - -void print_inlines(node_inl* ils, int indent); -void print_blocks(node_block* blk, int indent); - -void blocks_to_html(strbuf *html, node_block *b, bool tight); -void inlines_to_html(strbuf *html, node_inl *b); - -unsigned char *clean_url(chunk *url); -unsigned char *clean_autolink(chunk *url, int is_email); -unsigned char *clean_title(chunk *title); +void stmd_debug_print(node_block *root); +void stmd_render_html(strbuf *html, node_block *root); #endif -- cgit v1.2.3