From 94a79a605f3e76a43f1f87a5044f6761b99e5ca5 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Wed, 10 Sep 2014 18:33:27 +0200 Subject: Cleanup reference implementation --- src/references.c | 109 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 src/references.c (limited to 'src/references.c') diff --git a/src/references.c b/src/references.c new file mode 100644 index 0000000..ff64b00 --- /dev/null +++ b/src/references.c @@ -0,0 +1,109 @@ +#include "stmd.h" +#include "utf8.h" +#include "references.h" + +static unsigned int +refhash(const unsigned char *link_ref) +{ + unsigned int hash = 0; + + while (*link_ref) + hash = (*link_ref++) + (hash << 6) + (hash << 16) - hash; + + return hash; +} + +// normalize reference: collapse internal whitespace to single space, +// remove leading/trailing whitespace, case fold +static unsigned char *normalize_reference(chunk *ref) +{ + strbuf normalized = GH_BUF_INIT; + + utf8proc_case_fold(&normalized, ref->data, ref->len); + strbuf_trim(&normalized); + strbuf_normalize_whitespace(&normalized); + + return strbuf_detach(&normalized); +} + +static void add_reference(reference_map *map, reference* ref) +{ + ref->next = map->table[ref->hash % REFMAP_SIZE]; + map->table[ref->hash % REFMAP_SIZE] = ref; +} + +extern reference *reference_create(reference_map *map, chunk *label, chunk *url, chunk *title) +{ + reference *ref; + ref = malloc(sizeof(reference)); + ref->label = normalize_reference(label); + ref->hash = refhash(ref->label); + ref->url = clean_url(url); + ref->title = clean_title(title); + ref->next = NULL; + + add_reference(map, ref); + + return ref; +} + +// Returns reference if refmap contains a reference with matching +// label, otherwise NULL. +reference* reference_lookup(reference_map *map, chunk *label) +{ + reference *ref = NULL; + unsigned char *norm; + unsigned int hash; + + if (map == NULL) + return NULL; + + norm = normalize_reference(label); + hash = refhash(norm); + ref = map->table[hash % REFMAP_SIZE]; + + while (ref) { + if (ref->label[0] == norm[0] && + !strcmp((char *)ref->label, (char *)norm)) + break; + ref = ref->next; + } + + free(norm); + return ref; +} + +static void reference_free(reference *ref) +{ + free(ref->label); + free(ref->url); + free(ref->title); + free(ref); +} + +void reference_map_free(reference_map *map) +{ + unsigned int i; + + for (i = 0; i < REFMAP_SIZE; ++i) { + reference *ref = map->table[i]; + reference *next; + + while (ref) { + next = ref->next; + reference_free(ref); + ref = next; + } + } + + free(map->table); + free(map); +} + +reference_map *reference_map_new(void) +{ + reference_map *map = malloc(sizeof(reference_map)); + memset(map, 0x0, sizeof(reference_map)); + return map; +} + -- cgit v1.2.3 From c04e1e7aef06ce0836984b17e48a1d09bb83ce04 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Wed, 10 Sep 2014 18:38:56 +0200 Subject: Fix misc bugs --- src/references.c | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) (limited to 'src/references.c') diff --git a/src/references.c b/src/references.c index ff64b00..84cb773 100644 --- a/src/references.c +++ b/src/references.c @@ -13,6 +13,14 @@ refhash(const unsigned char *link_ref) return hash; } +static void reference_free(reference *ref) +{ + free(ref->label); + free(ref->url); + free(ref->title); + free(ref); +} + // normalize reference: collapse internal whitespace to single space, // remove leading/trailing whitespace, case fold static unsigned char *normalize_reference(chunk *ref) @@ -28,7 +36,18 @@ static unsigned char *normalize_reference(chunk *ref) static void add_reference(reference_map *map, reference* ref) { - ref->next = map->table[ref->hash % REFMAP_SIZE]; + reference *t = ref->next = map->table[ref->hash % REFMAP_SIZE]; + + while (t) { + if (t->hash == ref->hash && + !strcmp((char *)t->label, (char *)ref->label)) { + reference_free(ref); + return; + } + + t = t->next; + } + map->table[ref->hash % REFMAP_SIZE] = ref; } @@ -63,7 +82,7 @@ reference* reference_lookup(reference_map *map, chunk *label) ref = map->table[hash % REFMAP_SIZE]; while (ref) { - if (ref->label[0] == norm[0] && + if (ref->hash == hash && !strcmp((char *)ref->label, (char *)norm)) break; ref = ref->next; @@ -73,14 +92,6 @@ reference* reference_lookup(reference_map *map, chunk *label) return ref; } -static void reference_free(reference *ref) -{ - free(ref->label); - free(ref->url); - free(ref->title); - free(ref); -} - void reference_map_free(reference_map *map) { unsigned int i; @@ -96,7 +107,6 @@ void reference_map_free(reference_map *map) } } - free(map->table); free(map); } -- cgit v1.2.3 From 8c028e1a88c2d2aac4a4086202568bee43678aa8 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Wed, 10 Sep 2014 19:50:29 +0200 Subject: Do not create references with empty names --- src/buffer.c | 7 ++++--- src/references.c | 31 ++++++++++++++++++++++++++----- src/references.h | 2 +- 3 files changed, 31 insertions(+), 9 deletions(-) (limited to 'src/references.c') diff --git a/src/buffer.c b/src/buffer.c index cdf8ca0..7c2b86b 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -215,11 +215,12 @@ unsigned char *strbuf_detach(strbuf *buf) { unsigned char *data = buf->ptr; - if (buf->asize == 0 || buf->ptr == strbuf__oom) - return NULL; + if (buf->asize == 0 || buf->ptr == strbuf__oom) { + /* return an empty string */ + return calloc(1, 1); + } strbuf_init(buf, 0); - return data; } diff --git a/src/references.c b/src/references.c index 84cb773..300bbcc 100644 --- a/src/references.c +++ b/src/references.c @@ -23,15 +23,29 @@ static void reference_free(reference *ref) // normalize reference: collapse internal whitespace to single space, // remove leading/trailing whitespace, case fold +// Return NULL if the reference name is actually empty (i.e. composed +// solely from whitespace) static unsigned char *normalize_reference(chunk *ref) { strbuf normalized = GH_BUF_INIT; + unsigned char *result; + + if (ref->len == 0) + return NULL; utf8proc_case_fold(&normalized, ref->data, ref->len); strbuf_trim(&normalized); strbuf_normalize_whitespace(&normalized); - return strbuf_detach(&normalized); + result = strbuf_detach(&normalized); + assert(result); + + if (result[0] == '\0') { + free(result); + return NULL; + } + + return result; } static void add_reference(reference_map *map, reference* ref) @@ -51,19 +65,23 @@ static void add_reference(reference_map *map, reference* ref) map->table[ref->hash % REFMAP_SIZE] = ref; } -extern reference *reference_create(reference_map *map, chunk *label, chunk *url, chunk *title) +extern void reference_create(reference_map *map, chunk *label, chunk *url, chunk *title) { reference *ref; + unsigned char *reflabel = normalize_reference(label); + + /* empty reference name, or composed from only whitespace */ + if (reflabel == NULL) + return; + ref = malloc(sizeof(reference)); - ref->label = normalize_reference(label); + ref->label = reflabel; ref->hash = refhash(ref->label); ref->url = clean_url(url); ref->title = clean_title(title); ref->next = NULL; add_reference(map, ref); - - return ref; } // Returns reference if refmap contains a reference with matching @@ -78,6 +96,9 @@ reference* reference_lookup(reference_map *map, chunk *label) return NULL; norm = normalize_reference(label); + if (norm == NULL) + return NULL; + hash = refhash(norm); ref = map->table[hash % REFMAP_SIZE]; diff --git a/src/references.h b/src/references.h index 78fffe7..28937f1 100644 --- a/src/references.h +++ b/src/references.h @@ -22,6 +22,6 @@ typedef struct reference_map reference_map; reference_map *reference_map_new(void); void reference_map_free(reference_map *map); reference* reference_lookup(reference_map *map, chunk *label); -extern reference *reference_create(reference_map *map, chunk *label, chunk *url, chunk *title); +extern void reference_create(reference_map *map, chunk *label, chunk *url, chunk *title); #endif -- cgit v1.2.3 From 118e3d3c39242225baa876319cdbfbb1adadc77b Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Mon, 15 Sep 2014 15:28:49 +0200 Subject: Cleanup external APIs --- src/blocks.c | 11 ++-- src/html/html.c | 163 ++++++++++++++++++++++++++++--------------------------- src/inlines.c | 1 + src/main.c | 8 +-- src/print.c | 114 +++++++++++++++++++------------------- src/references.c | 1 + src/stmd.h | 26 ++------- 7 files changed, 159 insertions(+), 165 deletions(-) (limited to 'src/references.c') diff --git a/src/blocks.c b/src/blocks.c index 30a8284..2ac7032 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -6,8 +6,9 @@ #include "stmd.h" #include "utf8.h" -#include "html/houdini.h" #include "scanners.h" +#include "inlines.h" +#include "html/houdini.h" #define peek_at(i, n) (i)->data[n] @@ -224,7 +225,7 @@ static void finalize(node_block* b, int line_number) } // Add a node_block as child of another. Return pointer to child. -extern node_block* add_child(node_block* parent, +static node_block* add_child(node_block* parent, int block_type, int start_line, int start_column) { assert(parent); @@ -252,7 +253,7 @@ extern node_block* add_child(node_block* parent, } // Free a node_block list and any children. -extern void free_blocks(node_block* e) +void stmd_free_nodes(node_block *e) { node_block * next; while (e != NULL) { @@ -264,7 +265,7 @@ extern void free_blocks(node_block* e) } else if (e->tag == BLOCK_DOCUMENT) { reference_map_free(e->as.document.refmap); } - free_blocks(e->children); + stmd_free_nodes(e->children); free(e); e = next; } @@ -279,8 +280,6 @@ void process_inlines(node_block* cur, reference_map *refmap) case BLOCK_ATX_HEADER: case BLOCK_SETEXT_HEADER: cur->inline_content = parse_inlines(&cur->string_content, refmap); - // MEM - // strbuf_free(&cur->string_content); break; default: diff --git a/src/html/html.c b/src/html/html.c index b48b10b..6f3bc76 100644 --- a/src/html/html.c +++ b/src/html/html.c @@ -32,8 +32,89 @@ static inline void cr(strbuf *html) strbuf_putc(html, '\n'); } +// Convert an inline list to HTML. Returns 0 on success, and sets result. +static void inlines_to_html(strbuf *html, node_inl* ils) +{ + strbuf scrap = GH_BUF_INIT; + + while(ils != NULL) { + switch(ils->tag) { + case INL_STRING: + escape_html(html, ils->content.literal.data, ils->content.literal.len); + break; + + case INL_LINEBREAK: + strbuf_puts(html, "
\n"); + break; + + case INL_SOFTBREAK: + strbuf_putc(html, '\n'); + break; + + case INL_CODE: + strbuf_puts(html, ""); + escape_html(html, ils->content.literal.data, ils->content.literal.len); + strbuf_puts(html, ""); + break; + + case INL_RAW_HTML: + strbuf_put(html, + ils->content.literal.data, + ils->content.literal.len); + break; + + case INL_LINK: + strbuf_puts(html, "content.linkable.url) + escape_href(html, ils->content.linkable.url, -1); + + if (ils->content.linkable.title) { + strbuf_puts(html, "\" title=\""); + escape_html(html, ils->content.linkable.title, -1); + } + + strbuf_puts(html, "\">"); + inlines_to_html(html, ils->content.inlines); + strbuf_puts(html, ""); + break; + + case INL_IMAGE: + strbuf_puts(html, "content.linkable.url) + escape_href(html, ils->content.linkable.url, -1); + + inlines_to_html(&scrap, ils->content.inlines); + strbuf_puts(html, "\" alt=\""); + if (scrap.size) + escape_html(html, scrap.ptr, scrap.size); + strbuf_clear(&scrap); + + if (ils->content.linkable.title) { + strbuf_puts(html, "\" title=\""); + escape_html(html, ils->content.linkable.title, -1); + } + + strbuf_puts(html, "\"/>"); + break; + + case INL_STRONG: + strbuf_puts(html, ""); + inlines_to_html(html, ils->content.inlines); + strbuf_puts(html, ""); + break; + + case INL_EMPH: + strbuf_puts(html, ""); + inlines_to_html(html, ils->content.inlines); + strbuf_puts(html, ""); + break; + } + ils = ils->next; + } +} + // Convert a node_block list to HTML. Returns 0 on success, and sets result. -void blocks_to_html(strbuf *html, node_block *b, bool tight) +static void blocks_to_html(strbuf *html, node_block *b, bool tight) { struct ListData *data; @@ -139,83 +220,7 @@ void blocks_to_html(strbuf *html, node_block *b, bool tight) } } -// Convert an inline list to HTML. Returns 0 on success, and sets result. -void inlines_to_html(strbuf *html, node_inl* ils) +void stmd_render_html(strbuf *html, node_block *root) { - strbuf scrap = GH_BUF_INIT; - - while(ils != NULL) { - switch(ils->tag) { - case INL_STRING: - escape_html(html, ils->content.literal.data, ils->content.literal.len); - break; - - case INL_LINEBREAK: - strbuf_puts(html, "
\n"); - break; - - case INL_SOFTBREAK: - strbuf_putc(html, '\n'); - break; - - case INL_CODE: - strbuf_puts(html, ""); - escape_html(html, ils->content.literal.data, ils->content.literal.len); - strbuf_puts(html, ""); - break; - - case INL_RAW_HTML: - strbuf_put(html, - ils->content.literal.data, - ils->content.literal.len); - break; - - case INL_LINK: - strbuf_puts(html, "content.linkable.url) - escape_href(html, ils->content.linkable.url, -1); - - if (ils->content.linkable.title) { - strbuf_puts(html, "\" title=\""); - escape_html(html, ils->content.linkable.title, -1); - } - - strbuf_puts(html, "\">"); - inlines_to_html(html, ils->content.inlines); - strbuf_puts(html, ""); - break; - - case INL_IMAGE: - strbuf_puts(html, "content.linkable.url) - escape_href(html, ils->content.linkable.url, -1); - - inlines_to_html(&scrap, ils->content.inlines); - strbuf_puts(html, "\" alt=\""); - if (scrap.size) - escape_html(html, scrap.ptr, scrap.size); - strbuf_clear(&scrap); - - if (ils->content.linkable.title) { - strbuf_puts(html, "\" title=\""); - escape_html(html, ils->content.linkable.title, -1); - } - - strbuf_puts(html, "\"/>"); - break; - - case INL_STRONG: - strbuf_puts(html, ""); - inlines_to_html(html, ils->content.inlines); - strbuf_puts(html, ""); - break; - - case INL_EMPH: - strbuf_puts(html, ""); - inlines_to_html(html, ils->content.inlines); - strbuf_puts(html, ""); - break; - } - ils = ils->next; - } + blocks_to_html(html, root, false); } diff --git a/src/inlines.c b/src/inlines.c index cd2d124..145825c 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -8,6 +8,7 @@ #include "html/houdini.h" #include "utf8.h" #include "scanners.h" +#include "inlines.h" typedef struct Subject { chunk input; diff --git a/src/main.c b/src/main.c index 90bb16d..76a0e12 100644 --- a/src/main.c +++ b/src/main.c @@ -17,9 +17,9 @@ static void print_document(node_block *document, bool ast) strbuf html = GH_BUF_INIT; if (ast) { - print_blocks(document, 0); + stmd_debug_print(document); } else { - blocks_to_html(&html, document, false); + stmd_render_html(&html, document); printf("%s", html.ptr); strbuf_free(&html); } @@ -54,7 +54,7 @@ int main(int argc, char *argv[]) if (numfps == 0) { document = stmd_parse_file(stdin); print_document(document, ast); - free_blocks(document); + stmd_free_nodes(document); } else { for (i = 0; i < numfps; i++) { FILE *fp = fopen(argv[files[i]], "r"); @@ -67,7 +67,7 @@ int main(int argc, char *argv[]) document = stmd_parse_file(fp); print_document(document, ast); - free_blocks(document); + stmd_free_nodes(document); fclose(fp); } } diff --git a/src/print.c b/src/print.c index 36140a8..83f8daa 100644 --- a/src/print.c +++ b/src/print.c @@ -32,14 +32,69 @@ static void print_str(const unsigned char *s, int len) putchar('"'); } +// Prettyprint an inline list, for debugging. +static void print_inlines(node_inl* ils, int indent) +{ + while(ils != NULL) { + for (int i=0; i < indent; i++) { + putchar(' '); + } + switch(ils->tag) { + case INL_STRING: + printf("str "); + print_str(ils->content.literal.data, ils->content.literal.len); + putchar('\n'); + break; + case INL_LINEBREAK: + printf("linebreak\n"); + break; + case INL_SOFTBREAK: + printf("softbreak\n"); + break; + case INL_CODE: + printf("code "); + print_str(ils->content.literal.data, ils->content.literal.len); + putchar('\n'); + break; + case INL_RAW_HTML: + printf("html "); + print_str(ils->content.literal.data, ils->content.literal.len); + putchar('\n'); + break; + case INL_LINK: + case INL_IMAGE: + printf("%s url=", ils->tag == INL_LINK ? "link" : "image"); + + if (ils->content.linkable.url) + print_str(ils->content.linkable.url, -1); + + if (ils->content.linkable.title) { + printf(" title="); + print_str(ils->content.linkable.title, -1); + } + putchar('\n'); + print_inlines(ils->content.linkable.label, indent + 2); + break; + case INL_STRONG: + printf("strong\n"); + print_inlines(ils->content.linkable.label, indent + 2); + break; + case INL_EMPH: + printf("emph\n"); + print_inlines(ils->content.linkable.label, indent + 2); + break; + } + ils = ils->next; + } +} + // Functions to pretty-print inline and node_block lists, for debugging. // Prettyprint an inline list, for debugging. -extern void print_blocks(node_block* b, int indent) +static void print_blocks(node_block* b, int indent) { struct ListData *data; while(b != NULL) { - // printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line); for (int i=0; i < indent; i++) { putchar(' '); } @@ -115,58 +170,7 @@ extern void print_blocks(node_block* b, int indent) } } -// Prettyprint an inline list, for debugging. -extern void print_inlines(node_inl* ils, int indent) +void stmd_debug_print(node_block *root) { - while(ils != NULL) { - for (int i=0; i < indent; i++) { - putchar(' '); - } - switch(ils->tag) { - case INL_STRING: - printf("str "); - print_str(ils->content.literal.data, ils->content.literal.len); - putchar('\n'); - break; - case INL_LINEBREAK: - printf("linebreak\n"); - break; - case INL_SOFTBREAK: - printf("softbreak\n"); - break; - case INL_CODE: - printf("code "); - print_str(ils->content.literal.data, ils->content.literal.len); - putchar('\n'); - break; - case INL_RAW_HTML: - printf("html "); - print_str(ils->content.literal.data, ils->content.literal.len); - putchar('\n'); - break; - case INL_LINK: - case INL_IMAGE: - printf("%s url=", ils->tag == INL_LINK ? "link" : "image"); - - if (ils->content.linkable.url) - print_str(ils->content.linkable.url, -1); - - if (ils->content.linkable.title) { - printf(" title="); - print_str(ils->content.linkable.title, -1); - } - putchar('\n'); - print_inlines(ils->content.linkable.label, indent + 2); - break; - case INL_STRONG: - printf("strong\n"); - print_inlines(ils->content.linkable.label, indent + 2); - break; - case INL_EMPH: - printf("emph\n"); - print_inlines(ils->content.linkable.label, indent + 2); - break; - } - ils = ils->next; - } + print_blocks(root, 0); } diff --git a/src/references.c b/src/references.c index 300bbcc..3e54b48 100644 --- a/src/references.c +++ b/src/references.c @@ -1,6 +1,7 @@ #include "stmd.h" #include "utf8.h" #include "references.h" +#include "inlines.h" static unsigned int refhash(const unsigned char *link_ref) diff --git a/src/stmd.h b/src/stmd.h index 4e21e6c..c6473a6 100644 --- a/src/stmd.h +++ b/src/stmd.h @@ -104,28 +104,12 @@ struct node_block { typedef struct node_block node_block; -node_inl* parse_inlines(strbuf *input, reference_map *refmap); -void free_inlines(node_inl* e); +node_block *stmd_parse_document(const unsigned char *buffer, size_t len); +node_block *stmd_parse_file(FILE *f); -int parse_reference_inline(strbuf *input, reference_map *refmap); -void unescape_buffer(strbuf *buf); +void stmd_free_nodes(node_block *e); -extern node_block* make_document(); -extern node_block* add_child(node_block* parent, - int block_type, int start_line, int start_column); -void free_blocks(node_block* e); - -extern node_block *stmd_parse_document(const unsigned char *buffer, size_t len); -extern node_block *stmd_parse_file(FILE *f); - -void print_inlines(node_inl* ils, int indent); -void print_blocks(node_block* blk, int indent); - -void blocks_to_html(strbuf *html, node_block *b, bool tight); -void inlines_to_html(strbuf *html, node_inl *b); - -unsigned char *clean_url(chunk *url); -unsigned char *clean_autolink(chunk *url, int is_email); -unsigned char *clean_title(chunk *title); +void stmd_debug_print(node_block *root); +void stmd_render_html(strbuf *html, node_block *root); #endif -- cgit v1.2.3