From e216094e2192c05ddbd0988458eb8c0012e7baf8 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Tue, 2 Sep 2014 01:10:54 +0200 Subject: lol --- Makefile | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index 55b6645..cb5938d 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -CFLAGS=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS) -LDFLAGS=-g -O3 -Wall -Werror +CFLAGS=-ggdb3 -O0 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS) +LDFLAGS=-ggdb3 -O0 -Wall -Werror SRCDIR=src DATADIR=data @@ -41,13 +41,13 @@ testjs: spec.txt benchjs: node js/bench.js ${BENCHINP} -$(PROG): $(SRCDIR)/main.c $(SRCDIR)/inlines.o $(SRCDIR)/blocks.o $(SRCDIR)/detab.o $(SRCDIR)/bstrlib.o $(SRCDIR)/scanners.o $(SRCDIR)/print.o $(SRCDIR)/html.o $(SRCDIR)/utf8.o +$(PROG): $(SRCDIR)/main.c $(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/html.o $(SRCDIR)/utf8.o $(CC) $(LDFLAGS) -o $@ $^ $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re re2c --case-insensitive -bis $< > $@ || (rm $@ && false) -$(SRCDIR)/case_fold_switch.c: $(DATADIR)/CaseFolding-3.2.0.txt +$(SRCDIR)/case_fold_switch.inc $(DATADIR)/CaseFolding-3.2.0.txt perl mkcasefold.pl < $< > $@ .PHONY: leakcheck clean fuzztest dingus upload @@ -58,6 +58,9 @@ dingus: leakcheck: $(PROG) cat oldtests/*/*.markdown | valgrind --leak-check=full --dsymutil=yes $(PROG) +operf: $(PROG) + operf $(PROG) /dev/null + fuzztest: for i in `seq 1 10`; do \ time cat /dev/urandom | head -c 100000 | iconv -f latin1 -t utf-8 | $(PROG) >/dev/null; done @@ -69,7 +72,7 @@ update-site: spec.html narrative.html (cd _site ; git pull ; git commit -a -m "Updated site for latest spec, narrative, js" ; git push; cd ..) clean: - -rm test $(SRCDIR)/*.o $(SRCDIR)/scanners.c - -rm -r *.dSYM - -rm README.html - -rm spec.md fuzz.txt spec.html + -rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c + -rm -rf *.dSYM + -rm -f README.html + -rm -f spec.md fuzz.txt spec.html -- cgit v1.2.3 From 582674e662d1f8757350c51486a5e0a837195e15 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Tue, 2 Sep 2014 13:18:04 +0200 Subject: ffffix --- Makefile | 11 ++- src/blocks.c | 58 +++++++---- src/buffer.c | 69 +++++-------- src/buffer.h | 19 ++-- src/html.c | 276 ---------------------------------------------------- src/inlines.c | 4 +- src/main.c | 142 ++++++++++++--------------- src/print.c | 307 ++++++++++++++++++++++++++++++---------------------------- src/stmd.h | 13 +-- src/utf8.c | 6 +- src/utf8.h | 6 -- 11 files changed, 304 insertions(+), 607 deletions(-) delete mode 100644 src/html.c delete mode 100644 src/utf8.h (limited to 'Makefile') diff --git a/Makefile b/Makefile index cb5938d..d14a928 100644 --- a/Makefile +++ b/Makefile @@ -6,7 +6,7 @@ DATADIR=data PROG=./stmd .PHONY: all oldtests test spec benchjs testjs -all: $(SRCDIR)/case_fold_switch.c $(PROG) +all: $(SRCDIR)/case_fold_switch.inc $(PROG) README.html: README.md template.html pandoc --template template.html -S -s -t html5 -o $@ $< @@ -41,13 +41,16 @@ testjs: spec.txt benchjs: node js/bench.js ${BENCHINP} -$(PROG): $(SRCDIR)/main.c $(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/html.o $(SRCDIR)/utf8.o +HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o +STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o + +$(PROG): $(SRCDIR)/main.c $(HTML_OBJ) $(STMD_OBJ) $(CC) $(LDFLAGS) -o $@ $^ $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re re2c --case-insensitive -bis $< > $@ || (rm $@ && false) -$(SRCDIR)/case_fold_switch.inc $(DATADIR)/CaseFolding-3.2.0.txt +$(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding-3.2.0.txt perl mkcasefold.pl < $< > $@ .PHONY: leakcheck clean fuzztest dingus upload @@ -72,7 +75,7 @@ update-site: spec.html narrative.html (cd _site ; git pull ; git commit -a -m "Updated site for latest spec, narrative, js" ; git push; cd ..) clean: - -rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c + -rm -f test $(SRCDIR)/*.o $(SRCDIR)/scanners.c $(SRCDIR)/html/*.o -rm -rf *.dSYM -rm -f README.html -rm -f spec.md fuzz.txt spec.html diff --git a/src/blocks.c b/src/blocks.c index eabac03..71dc830 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -3,11 +3,12 @@ #include #include #include -#include "bstrlib.h" + #include "stmd.h" -#include "uthash.h" -#include "debug.h" #include "scanners.h" +#include "uthash.h" + +static void finalize(block* b, int line_number); static block* make_block(int tag, int start_line, int start_column) { @@ -140,7 +141,7 @@ static int break_out_of_lists(block ** bptr, int line_number) } -extern void finalize(block* b, int line_number) +static void finalize(block* b, int line_number) { int firstlinelen; int pos; @@ -364,7 +365,7 @@ static int lists_match(struct ListData list_data, list_data.bullet_char == item_data.bullet_char); } -static void expand_tabs(gh_buf *ob, const char *line, size_t size) +static void expand_tabs(gh_buf *ob, const unsigned char *line, size_t size) { size_t i = 0, tab = 0; @@ -389,13 +390,43 @@ static void expand_tabs(gh_buf *ob, const char *line, size_t size) } } -extern block *stmd_parse_document(const char *buffer, size_t len) +static block *finalize_parsing(block *document, int linenum) { - gh_buf line = GH_BUF_INIT; + while (document != document->top) { + finalize(document, linenum); + document = document->parent; + } + + finalize(document, linenum); + process_inlines(document, document->attributes.refmap); + + return document; +} +extern block *stmd_parse_file(FILE *f) +{ + gh_buf line = GH_BUF_INIT; + unsigned char buffer[4096]; + int linenum = 1; block *document = make_document(); + + while (fgets((char *)buffer, sizeof(buffer), f)) { + expand_tabs(&line, buffer, strlen(buffer)); + incorporate_line(&line, linenum, &document); + gh_buf_clear(&line); + linenum++; + } + + gh_buf_free(&line); + return finalize_document(document, linenum); +} + +extern block *stmd_parse_document(const unsigned char *buffer, size_t len) +{ + gh_buf line = GH_BUF_INIT; int linenum = 1; - const char *end = buffer + len; + const unsigned char *end = buffer + len; + block *document = make_document(); while (buffer < end) { const char *eol = memchr(buffer, '\n', end - buffer); @@ -414,16 +445,7 @@ extern block *stmd_parse_document(const char *buffer, size_t len) } gh_buf_free(&line); - - while (document != document->top) { - finalize(document, linenum); - document = document->parent; - } - - finalize(document, linenum); - process_inlines(document, document->attributes.refmap); - - return document; + return finalize_document(document, linenum); } // Process one line at a time, modifying a block. diff --git a/src/buffer.c b/src/buffer.c index b81e7fa..17dc864 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -31,10 +31,10 @@ void gh_buf_init(gh_buf *buf, int initial_size) int gh_buf_try_grow(gh_buf *buf, int target_size, bool mark_oom) { - char *new_ptr; - size_t new_size; + unsigned char *new_ptr; + int new_size; - if (buf->ptr == gh_buf__oom || buf->asize < 0) + if (buf->ptr == gh_buf__oom) return -1; if (target_size <= buf->asize) @@ -79,7 +79,7 @@ void gh_buf_free(gh_buf *buf) { if (!buf) return; - if (buf->asize > 0 && buf->ptr != gh_buf__initbuf && buf->ptr != gh_buf__oom) + if (buf->ptr != gh_buf__initbuf && buf->ptr != gh_buf__oom) free(buf->ptr); gh_buf_init(buf, 0); @@ -91,14 +91,9 @@ void gh_buf_clear(gh_buf *buf) if (buf->asize > 0) buf->ptr[0] = '\0'; - - if (buf->asize < 0) { - buf->ptr = gh_buf__initbuf; - buf->asize = 0; - } } -int gh_buf_set(gh_buf *buf, const char *data, int len) +int gh_buf_set(gh_buf *buf, const unsigned char *data, int len) { if (len == 0 || data == NULL) { gh_buf_clear(buf); @@ -115,10 +110,12 @@ int gh_buf_set(gh_buf *buf, const char *data, int len) int gh_buf_sets(gh_buf *buf, const char *string) { - return gh_buf_set(buf, string, string ? strlen(string) : 0); + return gh_buf_set(buf, + (const unsigned char *)string, + string ? strlen(string) : 0); } -int gh_buf_putc(gh_buf *buf, char c) +int gh_buf_putc(gh_buf *buf, int c) { ENSURE_SIZE(buf, buf->size + 2); buf->ptr[buf->size++] = c; @@ -126,7 +123,7 @@ int gh_buf_putc(gh_buf *buf, char c) return 0; } -int gh_buf_put(gh_buf *buf, const char *data, int len) +int gh_buf_put(gh_buf *buf, const unsigned char *data, int len) { ENSURE_SIZE(buf, buf->size + len + 1); memmove(buf->ptr + buf->size, data, len); @@ -137,8 +134,7 @@ int gh_buf_put(gh_buf *buf, const char *data, int len) int gh_buf_puts(gh_buf *buf, const char *string) { - assert(string); - return gh_buf_put(buf, string, strlen(string)); + return gh_buf_put(buf, (const unsigned char *)string, strlen(string)); } int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap) @@ -153,7 +149,7 @@ int gh_buf_vprintf(gh_buf *buf, const char *format, va_list ap) va_copy(args, ap); len = vsnprintf( - buf->ptr + buf->size, + (char *)buf->ptr + buf->size, buf->asize - buf->size, format, args ); @@ -187,9 +183,9 @@ int gh_buf_printf(gh_buf *buf, const char *format, ...) return r; } -void gh_buf_copy_cstr(char *data, size_t datasize, const gh_buf *buf) +void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf) { - size_t copylen; + int copylen; assert(data && datasize && buf); @@ -212,9 +208,9 @@ void gh_buf_swap(gh_buf *buf_a, gh_buf *buf_b) *buf_b = t; } -char *gh_buf_detach(gh_buf *buf) +unsigned char *gh_buf_detach(gh_buf *buf) { - char *data = buf->ptr; + unsigned char *data = buf->ptr; if (buf->asize == 0 || buf->ptr == gh_buf__oom) return NULL; @@ -224,13 +220,13 @@ char *gh_buf_detach(gh_buf *buf) return data; } -void gh_buf_attach(gh_buf *buf, char *ptr, int asize) +void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize) { gh_buf_free(buf); if (ptr) { buf->ptr = ptr; - buf->size = strlen(ptr); + buf->size = strlen((char *)ptr); if (asize) buf->asize = (asize < buf->size) ? buf->size + 1 : asize; else /* pass 0 to fall back on strlen + 1 */ @@ -249,11 +245,11 @@ int gh_buf_cmp(const gh_buf *a, const gh_buf *b) int gh_buf_strchr(const gh_buf *buf, int c, int pos) { - const char *p = memchr(buf->ptr + pos, c, buf->size - pos); - if (!p) - return -1; + const char *p = memchr(buf->ptr + pos, c, buf->size - pos); + if (!p) + return -1; - return (int)(p - p->ptr); + return (int)(p - buf->ptr); } int gh_buf_strrchr(const gh_buf *buf, int c, int pos) @@ -270,36 +266,21 @@ int gh_buf_strrchr(const gh_buf *buf, int c, int pos) void gh_buf_truncate(gh_buf *buf, size_t len) { - assert(buf->asize >= 0); - if (len < buf->size) { buf->size = len; buf->ptr[buf->size] = '\0'; } } -void gh_buf_ltruncate(gh_buf *buf, size_t len) -{ - assert(buf->asize >= 0); - - if (len && len < buf->size) { - memmove(buf->ptr, buf->ptr + len, buf->size - len); - buf->size -= len; - buf->ptr[buf->size] = '\0'; - } -} - void gh_buf_trim(gh_buf *buf) { - size_t i = 0; - - assert(buf->asize >= 0); - - /* ltrim */ + /* TODO: leading whitespace? */ + /* while (i < buf->size && isspace(buf->ptr[i])) i++; gh_buf_truncate(buf, i); + */ /* rtrim */ while (buf->size > 0) { diff --git a/src/buffer.h b/src/buffer.h index 2581ee3..422ef02 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -24,13 +24,6 @@ extern unsigned char gh_buf__oom[]; */ extern void gh_buf_init(gh_buf *buf, int initial_size); -static inline void gh_buf_static(gh_buf *buf, unsigned char *source) -{ - buf->ptr = source; - buf->size = strlen(source); - buf->asize = -1; -} - /** * Attempt to grow the buffer to hold at least `target_size` bytes. * @@ -81,13 +74,13 @@ static inline size_t gh_buf_len(const gh_buf *buf) extern int gh_buf_cmp(const gh_buf *a, const gh_buf *b); -extern void gh_buf_attach(gh_buf *buf, char *ptr, int asize); -extern char *gh_buf_detach(gh_buf *buf); +extern void gh_buf_attach(gh_buf *buf, unsigned char *ptr, int asize); +extern unsigned char *gh_buf_detach(gh_buf *buf); extern void gh_buf_copy_cstr(char *data, int datasize, const gh_buf *buf); static inline const char *gh_buf_cstr(const gh_buf *buf) { - return buf->ptr; + return (char *)buf->ptr; } #define gh_buf_at(buf, n) ((buf)->ptr[n]) @@ -100,10 +93,10 @@ static inline const char *gh_buf_cstr(const gh_buf *buf) * return code of these functions and call them in a series then just call * gh_buf_oom at the end. */ -extern int gh_buf_set(gh_buf *buf, const char *data, int len); +extern int gh_buf_set(gh_buf *buf, const unsigned char *data, int len); extern int gh_buf_sets(gh_buf *buf, const char *string); -extern int gh_buf_putc(gh_buf *buf, char c); -extern int gh_buf_put(gh_buf *buf, const char *data, int len); +extern int gh_buf_putc(gh_buf *buf, int c); +extern int gh_buf_put(gh_buf *buf, const unsigned char *data, int len); extern int gh_buf_puts(gh_buf *buf, const char *string); extern int gh_buf_printf(gh_buf *buf, const char *format, ...) __attribute__((format (printf, 2, 3))); diff --git a/src/html.c b/src/html.c deleted file mode 100644 index aeec5f1..0000000 --- a/src/html.c +++ /dev/null @@ -1,276 +0,0 @@ -#include -#include -#include -#include "bstrlib.h" -#include "stmd.h" -#include "debug.h" -#include "scanners.h" - -// Functions to convert block and inline lists to HTML strings. - -// Escape special characters in HTML. More efficient than -// three calls to bfindreplace. If preserve_entities is set, -// existing entities are left alone. -static bstring escape_html(bstring inp, bool preserve_entities) -{ - int pos = 0; - int match; - char c; - bstring escapable = blk2bstr("&<>\"", 4); - bstring ent; - bstring s = bstrcpy(inp); - while ((pos = binchr(s, pos, escapable)) != BSTR_ERR) { - c = bchar(s,pos); - switch (c) { - case '<': - bdelete(s, pos, 1); - ent = blk2bstr("<", 4); - binsert(s, pos, ent, ' '); - bdestroy(ent); - pos += 4; - break; - case '>': - bdelete(s, pos, 1); - ent = blk2bstr(">", 4); - binsert(s, pos, ent, ' '); - bdestroy(ent); - pos += 4; - break; - case '&': - if (preserve_entities && (match = scan_entity(s, pos))) { - pos += match; - } else { - bdelete(s, pos, 1); - ent = blk2bstr("&", 5); - binsert(s, pos, ent, ' '); - bdestroy(ent); - pos += 5; - } - break; - case '"': - bdelete(s, pos, 1); - ent = blk2bstr(""", 6); - binsert(s, pos, ent, ' '); - bdestroy(ent); - pos += 6; - break; - default: - bdelete(s, pos, 1); - log_err("unexpected character %02x", c); - } - } - bdestroy(escapable); - return s; -} - -static inline void cr(bstring buffer) -{ - int c = bchar(buffer, blength(buffer) - 1); - if (c != '\n' && c) { - bconchar(buffer, '\n'); - } -} - -// Convert a block list to HTML. Returns 0 on success, and sets result. -extern int blocks_to_html(block* b, bstring* result, bool tight) -{ - bstring contents = NULL; - bstring escaped, escaped2; - struct bstrList * info_words; - struct ListData * data; - bstring mbstart; - bstring html = blk2bstr("", 0); - - while(b != NULL) { - switch(b->tag) { - case document: - check(blocks_to_html(b->children, &contents, false) == 0, - "error converting blocks to html"); - bformata(html, "%s", contents->data); - bdestroy(contents); - break; - case paragraph: - check(inlines_to_html(b->inline_content, &contents) == 0, - "error converting inlines to html"); - if (tight) { - bformata(html, "%s", contents->data); - } else { - cr(html); - bformata(html, "

%s

", contents->data); - cr(html); - } - bdestroy(contents); - break; - case block_quote: - check(blocks_to_html(b->children, &contents, false) == 0, - "error converting blocks to html"); - cr(html); - bformata(html, "
\n%s
", contents->data); - cr(html); - bdestroy(contents); - break; - case list_item: - check(blocks_to_html(b->children, &contents, tight) == 0, - "error converting blocks to html"); - brtrimws(contents); - cr(html); - bformata(html, "
  • %s
  • ", contents->data); - cr(html); - bdestroy(contents); - break; - case list: - // make sure a list starts at the beginning of the line: - cr(html); - data = &(b->attributes.list_data); - check(blocks_to_html(b->children, &contents, data->tight) == 0, - "error converting blocks to html"); - mbstart = bformat(" start=\"%d\"", data->start); - bformata(html, "<%s%s>\n%s", - data->list_type == bullet ? "ul" : "ol", - data->start == 1 ? "" : (char*) mbstart->data, - contents->data, - data->list_type == bullet ? "ul" : "ol"); - cr(html); - bdestroy(contents); - bdestroy(mbstart); - break; - case atx_header: - case setext_header: - check(inlines_to_html(b->inline_content, &contents) == 0, - "error converting inlines to html"); - cr(html); - bformata(html, "%s", - b->attributes.header_level, - contents->data, - b->attributes.header_level); - cr(html); - bdestroy(contents); - break; - case indented_code: - escaped = escape_html(b->string_content, false); - cr(html); - bformata(html, "
    %s
    ", escaped->data); - cr(html); - bdestroy(escaped); - break; - case fenced_code: - escaped = escape_html(b->string_content, false); - cr(html); - bformata(html, "
    attributes.fenced_code_data.info) > 0) {
    -        escaped2 = escape_html(b->attributes.fenced_code_data.info, true);
    -        info_words = bsplit(escaped2, ' ');
    -        bformata(html, " class=\"language-%s\"", info_words->entry[0]->data);
    -        bdestroy(escaped2);
    -        bstrListDestroy(info_words);
    -      }
    -      bformata(html, ">%s
    ", escaped->data); - cr(html); - bdestroy(escaped); - break; - case html_block: - bformata(html, "%s", b->string_content->data); - break; - case hrule: - bformata(html, "
    "); - cr(html); - break; - case reference_def: - break; - default: - log_warn("block type %d not implemented\n", b->tag); - break; - } - b = b->next; - } - *result = html; - return 0; - error: - return -1; -} - -// Convert an inline list to HTML. Returns 0 on success, and sets result. -extern int inlines_to_html(inl* ils, bstring* result) -{ - bstring contents = NULL; - bstring html = blk2bstr("", 0); - bstring mbtitle, escaped, escaped2; - - while(ils != NULL) { - switch(ils->tag) { - case str: - escaped = escape_html(ils->content.literal, false); - bformata(html, "%s", escaped->data); - bdestroy(escaped); - break; - case linebreak: - bformata(html, "
    \n"); - break; - case softbreak: - bformata(html, "\n"); - break; - case code: - escaped = escape_html(ils->content.literal, false); - bformata(html, "%s", escaped->data); - bdestroy(escaped); - break; - case raw_html: - case entity: - bformata(html, "%s", ils->content.literal->data); - break; - case link: - check(inlines_to_html(ils->content.inlines, &contents) == 0, - "error converting inlines to html"); - if (blength(ils->content.linkable.title) > 0) { - escaped = escape_html(ils->content.linkable.title, true); - mbtitle = bformat(" title=\"%s\"", escaped->data); - bdestroy(escaped); - } else { - mbtitle = blk2bstr("",0); - } - escaped = escape_html(ils->content.linkable.url, true); - bformata(html, "%s", - escaped->data, - mbtitle->data, - contents->data); - bdestroy(escaped); - bdestroy(mbtitle); - bdestroy(contents); - break; - case image: - check(inlines_to_html(ils->content.inlines, &contents) == 0, - "error converting inlines to html"); - escaped = escape_html(ils->content.linkable.url, true); - escaped2 = escape_html(contents, false); - bdestroy(contents); - bformata(html, "\"%s\"",data, escaped2->data); - bdestroy(escaped); - bdestroy(escaped2); - if (blength(ils->content.linkable.title) > 0) { - escaped = escape_html(ils->content.linkable.title, true); - bformata(html, " title=\"%s\"", escaped->data); - bdestroy(escaped); - } - bformata(html, " />"); - break; - case strong: - check(inlines_to_html(ils->content.inlines, &contents) == 0, - "error converting inlines to html"); - bformata(html, "%s", contents->data); - bdestroy(contents); - break; - case emph: - check(inlines_to_html(ils->content.inlines, &contents) == 0, - "error converting inlines to html"); - bformata(html, "%s", contents->data); - bdestroy(contents); - break; - } - ils = ils->next; - } - *result = html; - return 0; - error: - return -1; -} diff --git a/src/inlines.c b/src/inlines.c index 4ff45ad..82c7219 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -6,9 +6,7 @@ #include "stmd.h" #include "uthash.h" -#include "debug.h" #include "scanners.h" -#include "utf8.h" typedef struct Subject { const gh_buf *buffer; @@ -119,7 +117,7 @@ inline static inl* make_linkable(int t, inl* label, chunk url, chunk title) e->tag = t; e->content.linkable.label = label; e->content.linkable.url = chunk_to_cstr(&url); - e->content.linkable.title = chunk_to_cstr(&title); + e->content.linkable.title = url.len ? chunk_to_cstr(&title) : NULL; e->next = NULL; return e; } diff --git a/src/main.c b/src/main.c index 9e0a3c8..e1abedc 100644 --- a/src/main.c +++ b/src/main.c @@ -1,99 +1,77 @@ #include #include -#include "bstrlib.h" +#include #include "stmd.h" #include "debug.h" void print_usage() { - printf("Usage: stmd [FILE*]\n"); - printf("Options: --help, -h Print usage information\n"); - printf(" --ast Print AST instead of HTML\n"); - printf(" --version Print version\n"); + printf("Usage: stmd [FILE*]\n"); + printf("Options: --help, -h Print usage information\n"); + printf(" --ast Print AST instead of HTML\n"); + printf(" --version Print version\n"); } -int main(int argc, char *argv[]) { - int i; - bool ast = false; - int g = 0; - int numfps = 0; - int files[argc]; +static void print_document(block *document, bool ast) +{ + gh_buf html = GH_BUF_INIT; + + if (ast) { + print_blocks(document, 0); + } else { + blocks_to_html(&html, document, false); + printf("%s", html.ptr); + gh_buf_free(&html); + } +} - for (i=1; i < argc; i++) { - if (strcmp(argv[i], "--version") == 0) { - printf("stmd %s", VERSION); - printf(" - CommonMark converter (c) 2014 John MacFarlane\n"); - exit(0); - } else if ((strcmp(argv[i], "--help") == 0) || - (strcmp(argv[i], "-h") == 0)) { - print_usage(); - exit(0); - } else if (strcmp(argv[i], "--ast") == 0) { - ast = true; - } else if (*argv[i] == '-') { - print_usage(); - exit(1); - } else { // treat as file argument - files[g] = i; - g++; - } - } +int main(int argc, char *argv[]) +{ + int i, numfps = 0; + bool ast = false; + int files[argc]; + block *document = NULL; - numfps = g; - bstring s = NULL; - bstring html; - g = 0; - block * cur = make_document(); - int linenum = 1; - extern int errno; - FILE * fp = NULL; + for (i = 1; i < argc; i++) { + if (strcmp(argv[i], "--version") == 0) { + printf("stmd %s", VERSION); + printf(" - CommonMark converter (c) 2014 John MacFarlane\n"); + exit(0); + } else if ((strcmp(argv[i], "--help") == 0) || + (strcmp(argv[i], "-h") == 0)) { + print_usage(); + exit(0); + } else if (strcmp(argv[i], "--ast") == 0) { + ast = true; + } else if (*argv[i] == '-') { + print_usage(); + exit(1); + } else { // treat as file argument + files[numfps++] = i; + } + } - if (numfps == 0) { - // read from stdin - while ((s = bgets((bNgetc) fgetc, stdin, '\n'))) { - check(incorporate_line(s, linenum, &cur) == 0, - "error incorporating line %d", linenum); - bdestroy(s); - linenum++; - } - } else { - // iterate over input file pointers - for (g=0; g < numfps; g++) { + if (numfps == 0) { + document = stmd_parse_file(stdin); + print_document(document, ast); + free_blocks(document); + } else { + for (i = 0; i < numfps; i++) { + FILE *fp = fopen(argv[files[i]], "r"); - fp = fopen(argv[files[g]], "r"); - if (fp == NULL) { - fprintf(stderr, "Error opening file %s: %s\n", - argv[files[g]], strerror(errno)); - exit(1); - } + if (fp == NULL) { + fprintf(stderr, "Error opening file %s: %s\n", + argv[files[i]], strerror(errno)); + exit(1); + } - while ((s = bgets((bNgetc) fgetc, fp, '\n'))) { - check(incorporate_line(s, linenum, &cur) == 0, - "error incorporating line %d", linenum); - bdestroy(s); - linenum++; - } - fclose(fp); - } - } + document = stmd_parse_file(fp); + print_document(document, ast); + free_blocks(document); + fclose(fp); + } + } - while (cur != cur->top) { - finalize(cur, linenum); - cur = cur->parent; - } - check(cur == cur->top, "problems finalizing open containers"); - finalize(cur, linenum); - process_inlines(cur, cur->attributes.refmap); - if (ast) { - print_blocks(cur, 0); - } else { - check(blocks_to_html(cur, &html, false) == 0, "could not format as HTML"); - // printf("%s", html->data); - bdestroy(html); - } - free_blocks(cur); - return 0; -error: - return -1; + return 0; } diff --git a/src/print.c b/src/print.c index a924870..3ebde16 100644 --- a/src/print.c +++ b/src/print.c @@ -1,168 +1,175 @@ #include #include -#include "bstrlib.h" +#include #include "stmd.h" #include "debug.h" -static bstring format_str(bstring s) +static void print_str(const unsigned char *s, int len) { - int pos = 0; - int len = blength(s); - bstring result = bfromcstr(""); - char c; - bformata(result, "\""); - while (pos < len) { - c = bchar(s, pos); - switch (c) { - case '\n': - bformata(result, "\\n"); - break; - case '"': - bformata(result, "\\\""); - break; - case '\\': - bformata(result, "\\\\"); - break; - default: - bformata(result, "%c", c); - } - pos++; - } - bformata(result, "\""); - return result; + int i; + + if (len < 0) + len = strlen(s); + + putchar('"'); + for (i = 0; i < len; ++i) { + unsigned char c = s[i]; + + switch (c) { + case '\n': + printf("\\n"); + break; + case '"': + printf("\\\""); + break; + case '\\': + printf("\\\\"); + break; + default: + putchar((int)c); + } + } + putchar('"'); } // Functions to pretty-print inline and block lists, for debugging. // Prettyprint an inline list, for debugging. extern void print_blocks(block* b, int indent) { - struct ListData * data; - while(b != NULL) { - // printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line); - for (int i=0; i < indent; i++) { - putchar(' '); - } - switch(b->tag) { - case document: - printf("document\n"); - print_blocks(b->children, indent + 2); - break; - case block_quote: - printf("block_quote\n"); - print_blocks(b->children, indent + 2); - break; - case list_item: - data = &(b->attributes.list_data); - printf("list_item\n"); - print_blocks(b->children, indent + 2); - break; - case list: - data = &(b->attributes.list_data); - if (data->list_type == ordered) { - printf("list (type=ordered tight=%s start=%d delim=%s)\n", - (data->tight ? "true" : "false"), - data->start, - (data->delimiter == parens ? "parens" : "period")); - } else { - printf("list (type=bullet tight=%s bullet_char=%c)\n", - (data->tight ? "true" : "false"), - data->bullet_char); - } - print_blocks(b->children, indent + 2); - break; - case atx_header: - printf("atx_header (level=%d)\n", b->attributes.header_level); - print_inlines(b->inline_content, indent + 2); - break; - case setext_header: - printf("setext_header (level=%d)\n", b->attributes.header_level); - print_inlines(b->inline_content, indent + 2); - break; - case paragraph: - printf("paragraph\n"); - print_inlines(b->inline_content, indent + 2); - break; - case hrule: - printf("hrule\n"); - break; - case indented_code: - printf("indented_code %s\n", format_str(b->string_content)->data); - break; - case fenced_code: - printf("fenced_code length=%d info=%s %s\n", - b->attributes.fenced_code_data.fence_length, - format_str(b->attributes.fenced_code_data.info)->data, - format_str(b->string_content)->data); - break; - case html_block: - printf("html_block %s\n", format_str(b->string_content)->data); - break; - case reference_def: - printf("reference_def\n"); - break; - default: - log_warn("block type %d not implemented\n", b->tag); - break; - } - b = b->next; - } + struct ListData *data; + + while(b != NULL) { + // printf("%3d %3d %3d| ", b->start_line, b->start_column, b->end_line); + for (int i=0; i < indent; i++) { + putchar(' '); + } + + switch(b->tag) { + case document: + printf("document\n"); + print_blocks(b->children, indent + 2); + break; + case block_quote: + printf("block_quote\n"); + print_blocks(b->children, indent + 2); + break; + case list_item: + data = &(b->attributes.list_data); + printf("list_item\n"); + print_blocks(b->children, indent + 2); + break; + case list: + data = &(b->attributes.list_data); + if (data->list_type == ordered) { + printf("list (type=ordered tight=%s start=%d delim=%s)\n", + (data->tight ? "true" : "false"), + data->start, + (data->delimiter == parens ? "parens" : "period")); + } else { + printf("list (type=bullet tight=%s bullet_char=%c)\n", + (data->tight ? "true" : "false"), + data->bullet_char); + } + print_blocks(b->children, indent + 2); + break; + case atx_header: + printf("atx_header (level=%d)\n", b->attributes.header_level); + print_inlines(b->inline_content, indent + 2); + break; + case setext_header: + printf("setext_header (level=%d)\n", b->attributes.header_level); + print_inlines(b->inline_content, indent + 2); + break; + case paragraph: + printf("paragraph\n"); + print_inlines(b->inline_content, indent + 2); + break; + case hrule: + printf("hrule\n"); + break; + case indented_code: + printf("indented_code "); + print_str(b->string_content.ptr, -1); + putchar('\n'); + break; + case fenced_code: + printf("fenced_code length=%d info=", + b->attributes.fenced_code_data.fence_length); + print_str(b->attributes.fenced_code_data.info.ptr, -1); + putchar(' '); + print_str(b->string_content.ptr, -1); + putchar('\n'); + break; + case html_block: + printf("html_block "); + print_str(b->string_content.ptr, -1); + putchar('\n'); + break; + case reference_def: + printf("reference_def\n"); + break; + default: + printf("# NOT IMPLEMENTED (%d)\n", b->tag); + break; + } + b = b->next; + } } // Prettyprint an inline list, for debugging. extern void print_inlines(inl* ils, int indent) { - while(ils != NULL) { - /* - // we add 11 extra spaces for the line/column info - for (int i=0; i < 11; i++) { - putchar(' '); - } - putchar('|'); - putchar(' '); - */ - for (int i=0; i < indent; i++) { - putchar(' '); - } - switch(ils->tag) { - case str: - printf("str %s\n", format_str(ils->content.literal)->data); - break; - case linebreak: - printf("linebreak\n"); - break; - case softbreak: - printf("softbreak\n"); - break; - case code: - printf("code %s\n", format_str(ils->content.literal)->data); - break; - case raw_html: - printf("html %s\n", format_str(ils->content.literal)->data); - break; - case entity: - printf("entity %s\n", format_str(ils->content.literal)->data); - break; - case link: - printf("link url=%s title=%s\n", - format_str(ils->content.linkable.url)->data, - format_str(ils->content.linkable.title)->data); - print_inlines(ils->content.linkable.label, indent + 2); - break; - case image: - printf("image url=%s title=%s\n", - format_str(ils->content.linkable.url)->data, - format_str(ils->content.linkable.title)->data); - print_inlines(ils->content.linkable.label, indent + 2); - break; - case strong: - printf("strong\n"); - print_inlines(ils->content.linkable.label, indent + 2); - break; - case emph: - printf("emph\n"); - print_inlines(ils->content.linkable.label, indent + 2); - break; - } - ils = ils->next; - } + while(ils != NULL) { + for (int i=0; i < indent; i++) { + putchar(' '); + } + switch(ils->tag) { + case str: + printf("str "); + print_str(ils->content.literal.data, ils->content.literal.len); + putchar('\n'); + break; + case linebreak: + printf("linebreak\n"); + break; + case softbreak: + printf("softbreak\n"); + break; + case code: + printf("code "); + print_str(ils->content.literal.data, ils->content.literal.len); + putchar('\n'); + break; + case raw_html: + printf("html "); + print_str(ils->content.literal.data, ils->content.literal.len); + putchar('\n'); + break; + case entity: + printf("entity "); + print_str(ils->content.literal.data, ils->content.literal.len); + putchar('\n'); + break; + case link: + case image: + printf("%s url=", ils->tag == link ? "link" : "image"); + print_str(ils->content.linkable.url, -1); + if (ils->content.linkable.title) { + printf(" title="); + print_str(ils->content.linkable.title, -1); + } + putchar('\n'); + print_inlines(ils->content.linkable.label, indent + 2); + break; + case strong: + printf("strong\n"); + print_inlines(ils->content.linkable.label, indent + 2); + break; + case emph: + printf("emph\n"); + print_inlines(ils->content.linkable.label, indent + 2); + break; + } + ils = ils->next; + } } diff --git a/src/stmd.h b/src/stmd.h index eb1b989..dc24235 100644 --- a/src/stmd.h +++ b/src/stmd.h @@ -105,19 +105,14 @@ extern block* add_child(block* parent, int block_type, int start_line, int start_column); void free_blocks(block* e); -block *stmd_parse_document(const char *buffer, size_t len); - -// FOR NOW: -void process_inlines(block* cur, reference** refmap); -void incorporate_line(gh_buf *ln, int line_number, block** curptr); -void finalize(block* b, int line_number); +extern block *stmd_parse_document(const unsigned char *buffer, size_t len); +extern block *stmd_parse_file(FILE *f); void print_inlines(inl* ils, int indent); void print_blocks(block* blk, int indent); -/* TODO */ -// int blocks_to_html(block* b, bstring* result, bool tight); -// int inlines_to_html(inl* b, bstring* result); +void blocks_to_html(gh_buf *html, block *b, bool tight); +void inlines_to_html(gh_buf *html, inl *b); void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len); diff --git a/src/utf8.c b/src/utf8.c index 1a5df9e..e3f8dd3 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -1,6 +1,8 @@ #include -#include "bstrlib.h" -#include "debug.h" +#include +#include + +#include "stmd.h" static const int8_t utf8proc_utf8class[256] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, diff --git a/src/utf8.h b/src/utf8.h deleted file mode 100644 index fe59a90..0000000 --- a/src/utf8.h +++ /dev/null @@ -1,6 +0,0 @@ -#include -#include "bstrlib.h" - -extern unsigned char * from_utf8(unsigned char * s, unsigned int *n); -extern int to_utf8(unsigned int c, bstring dest); -extern bstring case_fold(bstring source); -- cgit v1.2.3 From a7314deae649646f1f7ce5ede972641b5b62538c Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Wed, 3 Sep 2014 03:40:23 +0200 Subject: 338/103 --- Makefile | 4 +- src/blocks.c | 173 +++++++++++++++++----------------- src/buffer.c | 26 ++++- src/buffer.h | 2 +- src/html/houdini_href_e.c | 10 +- src/html/houdini_html_e.c | 10 +- src/html/html.c | 4 +- src/inlines.c | 235 ++++++++++++++++++---------------------------- src/print.c | 2 +- src/scanners.h | 28 +++--- src/scanners.re | 85 +++++++---------- src/stmd.h | 16 ++-- 12 files changed, 261 insertions(+), 334 deletions(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index d14a928..89ec68c 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -CFLAGS=-ggdb3 -O0 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS) -LDFLAGS=-ggdb3 -O0 -Wall -Werror +CFLAGS=-ggdb3 -O0 -Wall -Wextra -Wno-unused-variable -std=c99 -Isrc $(OPTFLAGS) +LDFLAGS=-ggdb3 -O0 -Wall -Wno-unused-variable # -Werror SRCDIR=src DATADIR=data diff --git a/src/blocks.c b/src/blocks.c index 42f20db..94ff986 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -8,6 +8,8 @@ #include "scanners.h" #include "uthash.h" +#define peek_at(i, n) (i)->data[n] + static void incorporate_line(gh_buf *ln, int line_number, block** curptr); static void finalize(block* b, int line_number); @@ -27,7 +29,6 @@ static block* make_block(int tag, int start_line, int start_column) e->top = NULL; e->attributes.refmap = NULL; gh_buf_init(&e->string_content, 32); - e->string_pos = 0; e->inline_content = NULL; e->next = NULL; e->prev = NULL; @@ -80,10 +81,10 @@ static inline bool accepts_lines(int block_type) block_type == fenced_code); } -static void add_line(block* block, gh_buf *ln, int offset) +static void add_line(block* block, chunk *ch, int offset) { assert(block->open); - gh_buf_put(&block->string_content, ln->ptr + offset, ln->size - offset); + gh_buf_put(&block->string_content, ch->data + offset, ch->len - offset); } static void remove_trailing_blank_lines(gh_buf *ln) @@ -104,7 +105,7 @@ static void remove_trailing_blank_lines(gh_buf *ln) i = gh_buf_strchr(ln, '\n', i); if (i >= 0) - gh_buf_truncate(ln, i + 1); + gh_buf_truncate(ln, i); } // Check to see if a block ends with a blank line, descending @@ -162,12 +163,12 @@ static void finalize(block* b, int line_number) switch (b->tag) { case paragraph: pos = 0; - while (gh_buf_at(&b->string_content, b->string_pos) == '[' && - (pos = parse_reference(&b->string_content, b->string_pos, - b->top->attributes.refmap))) { - b->string_pos = pos; + while (gh_buf_at(&b->string_content, 0) == '[' && + (pos = parse_reference(&b->string_content, b->top->attributes.refmap))) { + + gh_buf_drop(&b->string_content, pos); } - if (is_blank(&b->string_content, b->string_pos)) { + if (is_blank(&b->string_content, 0)) { b->tag = reference_def; } break; @@ -179,14 +180,16 @@ static void finalize(block* b, int line_number) case fenced_code: // first line of contents becomes info - firstlinelen = gh_buf_strchr(&b->string_content, '\n', b->string_pos); + firstlinelen = gh_buf_strchr(&b->string_content, '\n', 0); + + gh_buf_init(&b->attributes.fenced_code_data.info, 0); gh_buf_set( &b->attributes.fenced_code_data.info, - b->string_content.ptr + b->string_pos, + b->string_content.ptr, firstlinelen ); - b->string_pos = firstlinelen + 1; + gh_buf_drop(&b->string_content, firstlinelen + 1); gh_buf_trim(&b->attributes.fenced_code_data.info); unescape_buffer(&b->attributes.fenced_code_data.info); @@ -281,7 +284,7 @@ void process_inlines(block* cur, reference** refmap) case paragraph: case atx_header: case setext_header: - cur->inline_content = parse_inlines(&cur->string_content, cur->string_pos, refmap); + cur->inline_content = parse_inlines(&cur->string_content, refmap); // MEM // gh_buf_free(&cur->string_content); break; @@ -300,19 +303,18 @@ void process_inlines(block* cur, reference** refmap) // Attempts to parse a list item marker (bullet or enumerated). // On success, returns length of the marker, and populates // data with the details. On failure, returns 0. -static int parse_list_marker(gh_buf *ln, int pos, - struct ListData ** dataptr) +static int parse_list_marker(chunk *input, int pos, struct ListData ** dataptr) { - char c; + unsigned char c; int startpos; struct ListData * data; startpos = pos; - c = gh_buf_at(ln, pos); + c = peek_at(input, pos); - if ((c == '*' || c == '-' || c == '+') && !scan_hrule(ln, pos)) { + if ((c == '*' || c == '-' || c == '+') && !scan_hrule(input, pos)) { pos++; - if (!isspace(gh_buf_at(ln, pos))) { + if (!isspace(peek_at(input, pos))) { return 0; } data = malloc(sizeof(struct ListData)); @@ -327,14 +329,14 @@ static int parse_list_marker(gh_buf *ln, int pos, int start = 0; do { - start = (10 * start) + (gh_buf_at(ln, pos) - '0'); + start = (10 * start) + (peek_at(input, pos) - '0'); pos++; - } while (isdigit(gh_buf_at(ln, pos))); + } while (isdigit(peek_at(input, pos))); - c = gh_buf_at(ln, pos); + c = peek_at(input, pos); if (c == '.' || c == ')') { pos++; - if (!isspace(gh_buf_at(ln, pos))) { + if (!isspace(peek_at(input, pos))) { return 0; } data = malloc(sizeof(struct ListData)); @@ -449,8 +451,26 @@ extern block *stmd_parse_document(const unsigned char *buffer, size_t len) return finalize_document(document, linenum); } +static void chop_trailing_hashtags(chunk *ch) +{ + int n; + + chunk_rtrim(ch); + n = ch->len - 1; + + // if string ends in #s, remove these: + while (n >= 0 && peek_at(ch, n) == '#') + n--; + + // the last # was escaped, so we include it. + if (n >= 0 && peek_at(ch, n) == '\\') + n++; + + ch->len = n + 1; +} + // Process one line at a time, modifying a block. -static void incorporate_line(gh_buf *ln, int line_number, block** curptr) +static void incorporate_line(gh_buf *line, int line_number, block** curptr) { block* last_matched_container; int offset = 0; @@ -464,6 +484,10 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr) bool blank = false; int first_nonspace; int indent; + chunk input; + + input.data = line->ptr; + input.len = line->size; // container starts at the document root. container = cur->top; @@ -475,21 +499,19 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr) container = container->last_child; first_nonspace = offset; - while (gh_buf_at(ln, first_nonspace) == ' ') { + while (peek_at(&input, first_nonspace) == ' ') { first_nonspace++; } indent = first_nonspace - offset; - blank = gh_buf_at(ln, first_nonspace) == '\n'; + blank = peek_at(&input, first_nonspace) == '\n'; if (container->tag == block_quote) { - - matched = indent <= 3 && gh_buf_at(ln, first_nonspace) == '>'; + matched = indent <= 3 && peek_at(&input, first_nonspace) == '>'; if (matched) { offset = first_nonspace + 1; - if (gh_buf_at(ln, offset) == ' ') { + if (peek_at(&input, offset) == ' ') offset++; - } } else { all_matched = false; } @@ -526,7 +548,7 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr) // skip optional spaces of fence offset i = container->attributes.fenced_code_data.fence_offset; - while (i > 0 && gh_buf_at(ln, offset) == ' ') { + while (i > 0 && peek_at(&input, offset) == ' ') { offset++; i--; } @@ -564,15 +586,13 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr) container->tag != html_block) { first_nonspace = offset; - while (gh_buf_at(ln, first_nonspace) == ' ') { + while (peek_at(&input, first_nonspace) == ' ') first_nonspace++; - } indent = first_nonspace - offset; - blank = gh_buf_at(ln, first_nonspace) == '\n'; + blank = peek_at(&input, first_nonspace) == '\n'; if (indent >= CODE_INDENT) { - if (cur->tag != paragraph && !blank) { offset += CODE_INDENT; container = add_child(container, indented_code, line_number, offset + 1); @@ -580,76 +600,70 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr) break; } - } else if (gh_buf_at(ln, first_nonspace) == '>') { + } else if (peek_at(&input, first_nonspace) == '>') { offset = first_nonspace + 1; // optional following character - if (gh_buf_at(ln, offset) == ' ') { + if (peek_at(&input, offset) == ' ') offset++; - } container = add_child(container, block_quote, line_number, offset + 1); - } else if ((matched = scan_atx_header_start(ln, first_nonspace))) { + } else if ((matched = scan_atx_header_start(&input, first_nonspace))) { offset = first_nonspace + matched; container = add_child(container, atx_header, line_number, offset + 1); - int hashpos = gh_buf_strchr(ln, '#', first_nonspace); - assert(hashpos >= 0); - + int hashpos = chunk_strchr(&input, '#', first_nonspace); int level = 0; - while (gh_buf_at(ln, hashpos) == '#') { + + while (peek_at(&input, hashpos) == '#') { level++; hashpos++; } container->attributes.header_level = level; - } else if ((matched = scan_open_code_fence(ln, first_nonspace))) { + } else if ((matched = scan_open_code_fence(&input, first_nonspace))) { - container = add_child(container, fenced_code, line_number, - first_nonspace + 1); - container->attributes.fenced_code_data.fence_char = gh_buf_at(ln, - first_nonspace); + container = add_child(container, fenced_code, line_number, first_nonspace + 1); + container->attributes.fenced_code_data.fence_char = peek_at(&input, first_nonspace); container->attributes.fenced_code_data.fence_length = matched; - container->attributes.fenced_code_data.fence_offset = - first_nonspace - offset; + container->attributes.fenced_code_data.fence_offset = first_nonspace - offset; offset = first_nonspace + matched; - } else if ((matched = scan_html_block_tag(ln, first_nonspace))) { + } else if ((matched = scan_html_block_tag(&input, first_nonspace))) { - container = add_child(container, html_block, line_number, - first_nonspace + 1); + container = add_child(container, html_block, line_number, first_nonspace + 1); // note, we don't adjust offset because the tag is part of the text } else if (container->tag == paragraph && - (lev = scan_setext_header_line(ln, first_nonspace)) && + (lev = scan_setext_header_line(&input, first_nonspace)) && // check that there is only one line in the paragraph: gh_buf_strrchr(&container->string_content, '\n', gh_buf_len(&container->string_content) - 2) < 0) { container->tag = setext_header; container->attributes.header_level = lev; - offset = gh_buf_len(ln) - 1; + offset = input.len - 1; } else if (!(container->tag == paragraph && !all_matched) && - (matched = scan_hrule(ln, first_nonspace))) { + (matched = scan_hrule(&input, first_nonspace))) { // it's only now that we know the line is not part of a setext header: container = add_child(container, hrule, line_number, first_nonspace + 1); finalize(container, line_number); container = container->parent; - offset = gh_buf_len(ln) - 1; + offset = input.len - 1; - } else if ((matched = parse_list_marker(ln, first_nonspace, &data))) { + } else if ((matched = parse_list_marker(&input, first_nonspace, &data))) { // compute padding: offset = first_nonspace + matched; i = 0; - while (i <= 5 && gh_buf_at(ln, offset + i) == ' ') { + while (i <= 5 && peek_at(&input, offset + i) == ' ') { i++; } // i = number of spaces after marker, up to 5 - if (i >= 5 || i < 1 || gh_buf_at(ln, offset) == '\n') { + if (i >= 5 || i < 1 || peek_at(&input, offset) == '\n') { data->padding = matched + 1; if (i > 0) { offset += 1; @@ -674,6 +688,7 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr) // add the list item container = add_child(container, list_item, line_number, first_nonspace + 1); + /* TODO: static */ container->attributes.list_data = *data; free(data); @@ -691,12 +706,11 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr) // appropriate container. first_nonspace = offset; - while (gh_buf_at(ln, first_nonspace) == ' ') { + while (peek_at(&input, first_nonspace) == ' ') first_nonspace++; - } indent = first_nonspace - offset; - blank = gh_buf_at(ln, first_nonspace) == '\n'; + blank = peek_at(&input, first_nonspace) == '\n'; // block quote lines are never blank as they start with > // and we don't count blanks in fenced code for purposes of tight/loose @@ -721,13 +735,12 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr) cur->tag == paragraph && gh_buf_len(&cur->string_content) > 0) { - add_line(cur, ln, offset); + add_line(cur, &input, offset); } else { // not a lazy continuation // finalize any blocks that were not matched and set cur to container: while (cur != last_matched_container) { - finalize(cur, line_number); cur = cur->parent; assert(cur != NULL); @@ -735,58 +748,46 @@ static void incorporate_line(gh_buf *ln, int line_number, block** curptr) if (container->tag == indented_code) { - add_line(container, ln, offset); + add_line(container, &input, offset); } else if (container->tag == fenced_code) { matched = (indent <= 3 - && gh_buf_at(ln, first_nonspace) == container->attributes.fenced_code_data.fence_char) - && scan_close_code_fence(ln, first_nonspace, + && peek_at(&input, first_nonspace) == container->attributes.fenced_code_data.fence_char) + && scan_close_code_fence(&input, first_nonspace, container->attributes.fenced_code_data.fence_length); if (matched) { // if closing fence, don't add line to container; instead, close it: finalize(container, line_number); container = container->parent; // back up to parent } else { - add_line(container, ln, offset); + add_line(container, &input, offset); } } else if (container->tag == html_block) { - add_line(container, ln, offset); + add_line(container, &input, offset); } else if (blank) { // ??? do nothing } else if (container->tag == atx_header) { - // chop off trailing ###s...use a scanner? - gh_buf_trim(ln); - int p = gh_buf_len(ln) - 1; - - // if string ends in #s, remove these: - while (gh_buf_at(ln, p) == '#') { - p--; - } - if (gh_buf_at(ln, p) == '\\') { - // the last # was escaped, so we include it. - p++; - } - gh_buf_truncate(ln, p + 1); - add_line(container, ln, first_nonspace); + chop_trailing_hashtags(&input); + add_line(container, &input, first_nonspace); finalize(container, line_number); container = container->parent; } else if (accepts_lines(container->tag)) { - add_line(container, ln, first_nonspace); + add_line(container, &input, first_nonspace); } else if (container->tag != hrule && container->tag != setext_header) { // create paragraph container for line container = add_child(container, paragraph, line_number, first_nonspace + 1); - add_line(container, ln, first_nonspace); + add_line(container, &input, first_nonspace); } else { assert(false); diff --git a/src/buffer.c b/src/buffer.c index cfc6a7e..dc4a405 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -95,7 +95,7 @@ void gh_buf_clear(gh_buf *buf) int gh_buf_set(gh_buf *buf, const unsigned char *data, int len) { - if (len == 0 || data == NULL) { + if (len <= 0 || data == NULL) { gh_buf_clear(buf); } else { if (data != buf->ptr) { @@ -125,6 +125,9 @@ int gh_buf_putc(gh_buf *buf, int c) int gh_buf_put(gh_buf *buf, const unsigned char *data, int len) { + if (len <= 0) + return 0; + ENSURE_SIZE(buf, buf->size + len + 1); memmove(buf->ptr + buf->size, data, len); buf->size += len; @@ -272,15 +275,28 @@ void gh_buf_truncate(gh_buf *buf, int len) } } +void gh_buf_drop(gh_buf *buf, int n) +{ + if (n > 0) { + buf->size = buf->size - n; + if (buf->size) + memmove(buf->ptr, buf->ptr + n, buf->size); + + buf->ptr[buf->size] = '\0'; + } +} + void gh_buf_trim(gh_buf *buf) { - /* TODO: leading whitespace? */ - /* + int i = 0; + + if (!buf->size) + return; + while (i < buf->size && isspace(buf->ptr[i])) i++; - gh_buf_truncate(buf, i); - */ + gh_buf_drop(buf, i); /* rtrim */ while (buf->size > 0) { diff --git a/src/buffer.h b/src/buffer.h index 422ef02..0d5143e 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -105,8 +105,8 @@ extern void gh_buf_clear(gh_buf *buf); int gh_buf_strchr(const gh_buf *buf, int c, int pos); int gh_buf_strrchr(const gh_buf *buf, int c, int pos); +void gh_buf_drop(gh_buf *buf, int n); void gh_buf_truncate(gh_buf *buf, int len); -void gh_buf_ltruncate(gh_buf *buf, int len); void gh_buf_trim(gh_buf *buf); #endif diff --git a/src/html/houdini_href_e.c b/src/html/houdini_href_e.c index 59fe850..b2a7d79 100644 --- a/src/html/houdini_href_e.c +++ b/src/html/houdini_href_e.c @@ -62,16 +62,8 @@ houdini_escape_href(gh_buf *ob, const uint8_t *src, size_t size) while (i < size && HREF_SAFE[src[i]] != 0) i++; - if (likely(i > org)) { - if (unlikely(org == 0)) { - if (i >= size) - return 0; - - gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size)); - } - + if (likely(i > org)) gh_buf_put(ob, src + org, i - org); - } /* escaping */ if (i >= size) diff --git a/src/html/houdini_html_e.c b/src/html/houdini_html_e.c index 316c5ce..5cdd3dd 100644 --- a/src/html/houdini_html_e.c +++ b/src/html/houdini_html_e.c @@ -54,16 +54,8 @@ houdini_escape_html0(gh_buf *ob, const uint8_t *src, size_t size, int secure) while (i < size && (esc = HTML_ESCAPE_TABLE[src[i]]) == 0) i++; - if (i > org) { - if (unlikely(org == 0)) { - if (i >= size) - return 0; - - gh_buf_grow(ob, HOUDINI_ESCAPED_SIZE(size)); - } - + if (i > org) gh_buf_put(ob, src + org, i - org); - } /* escaping */ if (unlikely(i >= size)) diff --git a/src/html/html.c b/src/html/html.c index 2f160ca..27ffe58 100644 --- a/src/html/html.c +++ b/src/html/html.c @@ -68,7 +68,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight) cr(html); gh_buf_puts(html, "
  • "); blocks_to_html(html, b->children, tight); - gh_buf_trim(html); + gh_buf_trim(html); /* TODO: rtrim */ gh_buf_puts(html, "
  • "); cr(html); break; @@ -106,7 +106,7 @@ void blocks_to_html(gh_buf *html, block *b, bool tight) cr(html); gh_buf_puts(html, "
    ");
     				escape_html(html, b->string_content.ptr, b->string_content.size);
    -				gh_buf_puts(html, "
    "); + gh_buf_puts(html, ""); cr(html); break; diff --git a/src/inlines.c b/src/inlines.c index 7b48ad9..ef27a24 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -9,10 +9,10 @@ #include "scanners.h" typedef struct Subject { - const gh_buf *buffer; - int pos; - reference** reference_map; - int label_nestlevel; + chunk input; + int pos; + int label_nestlevel; + reference** reference_map; } subject; reference* lookup_reference(reference** refmap, chunk *label); @@ -27,12 +27,16 @@ inline static void chunk_trim(chunk *c); inline static chunk chunk_literal(const char *data); inline static chunk chunk_buf_detach(gh_buf *buf); -inline static chunk chunk_buf(const gh_buf *buf, int pos, int len); +inline static chunk chunk_dup(const chunk *ch, int pos, int len); static inl *parse_chunk_inlines(chunk *chunk, reference** refmap); static inl *parse_inlines_while(subject* subj, int (*f)(subject*)); static int parse_inline(subject* subj, inl ** last); +static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap); +static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap); +static int subject_find_special_char(subject *subj); + extern void free_reference(reference *ref) { free(ref->label); free(ref->url); @@ -101,10 +105,12 @@ extern reference* make_reference(chunk *label, chunk *url, chunk *title) extern void add_reference(reference** refmap, reference* ref) { reference * t = NULL; - HASH_FIND(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), t); + const char *label = (const char *)ref->label; + + HASH_FIND(hh, *refmap, label, strlen(label), t); if (t == NULL) { - HASH_ADD_KEYPTR(hh, *refmap, (char*)ref->label, (unsigned)strlen(ref->label), ref); + HASH_ADD_KEYPTR(hh, *refmap, label, strlen(label), ref); } else { free_reference(ref); // we free this now since it won't be in the refmap } @@ -210,87 +216,49 @@ inline static inl* append_inlines(inl* a, inl* b) return a; } -// Make a 'subject' from an input string. -static void init_subject(subject *e, gh_buf *buffer, int input_pos, reference** refmap) +static void subject_from_buf(subject *e, gh_buf *buffer, reference** refmap) { - e->buffer = buffer; - e->pos = input_pos; + e->input.data = buffer->ptr; + e->input.len = buffer->size; + e->input.alloc = 0; + e->pos = 0; e->label_nestlevel = 0; e->reference_map = refmap; -} - -inline static int isbacktick(int c) -{ - return (c == '`'); -} - -inline static void chunk_free(chunk *c) -{ - if (c->alloc) - free((char *)c->data); - - c->data = NULL; - c->alloc = 0; - c->len = 0; -} - -inline static void chunk_trim(chunk *c) -{ - while (c->len && isspace(c->data[0])) { - c->data++; - c->len--; - } - - while (c->len > 0) { - if (!isspace(c->data[c->len - 1])) - break; - c->len--; - } + chunk_rtrim(&e->input); } -inline static unsigned char *chunk_to_cstr(chunk *c) +static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap) { - unsigned char *str; - - str = malloc(c->len + 1); - memcpy(str, c->data, c->len); - str[c->len] = 0; + e->input.data = chunk->data; + e->input.len = chunk->len; + e->input.alloc = 0; + e->pos = 0; + e->label_nestlevel = 0; + e->reference_map = refmap; - return str; + chunk_rtrim(&e->input); } -inline static chunk chunk_literal(const char *data) +inline static int isbacktick(int c) { - chunk c = {data, data ? strlen(data) : 0, 0}; - return c; + return (c == '`'); } -inline static chunk chunk_buf(const gh_buf *buf, int pos, int len) +static inline unsigned char peek_char(subject *subj) { - chunk c = {buf->ptr + pos, len, 0}; - return c; + return (subj->pos < subj->input.len) ? subj->input.data[subj->pos] : 0; } -inline static chunk chunk_buf_detach(gh_buf *buf) +static inline unsigned char peek_at(subject *subj, int pos) { - chunk c; - - c.len = buf->size; - c.data = gh_buf_detach(buf); - c.alloc = 1; - - return c; + return subj->input.data[pos]; } -// Return the next character in the subject, without advancing. -// Return 0 if at the end of the subject. -#define peek_char(subj) gh_buf_at((subj)->buffer, (subj)->pos) - // Return true if there are more characters in the subject. inline static int is_eof(subject* subj) { - return (subj->pos >= gh_buf_len(subj->buffer)); + return (subj->pos >= subj->input.len); } // Advance the subject. Doesn't check for eof. @@ -308,7 +276,7 @@ inline static chunk take_while(subject* subj, int (*f)(int)) len++; } - return chunk_buf(subj->buffer, startpos, len); + return chunk_dup(&subj->input, startpos, len); } // Try to process a backtick code span that began with a @@ -388,7 +356,7 @@ static inl* handle_backticks(subject *subj) } else { gh_buf buf = GH_BUF_INIT; - gh_buf_set(&buf, subj->buffer->ptr + startpos, endpos - startpos - openticks.len); + gh_buf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len); gh_buf_trim(&buf); normalize_whitespace(&buf); @@ -404,7 +372,7 @@ static int scan_delims(subject* subj, char c, bool * can_open, bool * can_close) char char_before, char_after; int startpos = subj->pos; - char_before = subj->pos == 0 ? '\n' : gh_buf_at(subj->buffer, subj->pos - 1); + char_before = subj->pos == 0 ? '\n' : peek_at(subj, subj->pos - 1); while (peek_char(subj) == c) { numdelims++; advance(subj); @@ -439,7 +407,7 @@ static inl* handle_strong_emph(subject* subj, char c) numdelims = scan_delims(subj, c, &can_open, &can_close); subj->pos += numdelims; - new = make_str(chunk_buf(subj->buffer, subj->pos - numdelims, numdelims)); + new = make_str(chunk_dup(&subj->input, subj->pos - numdelims, numdelims)); *last = new; first_head = new; result = new; @@ -488,7 +456,7 @@ static inl* handle_strong_emph(subject* subj, char c) numdelims = scan_delims(subj, c, &can_open, &can_close); if (can_close && numdelims >= 1 && numdelims <= 3 && numdelims != first_close_delims) { - new = make_str(chunk_buf(subj->buffer, subj->pos, numdelims)); + new = make_str(chunk_dup(&subj->input, subj->pos, numdelims)); append_inlines(*last, new); *last = new; if (first_close_delims == 1 && numdelims > 2) { @@ -554,7 +522,7 @@ static inl* handle_backslash(subject *subj) unsigned char nextchar = peek_char(subj); if (ispunct(nextchar)) { // only ascii symbols and newline can be escaped advance(subj); - return make_str(chunk_buf(subj->buffer, subj->pos - 1, 1)); + return make_str(chunk_dup(&subj->input, subj->pos - 1, 1)); } else if (nextchar == '\n') { advance(subj); return make_linebreak(); @@ -569,9 +537,9 @@ static inl* handle_entity(subject* subj) { int match; inl *result; - match = scan_entity(subj->buffer, subj->pos); + match = scan_entity(&subj->input, subj->pos); if (match) { - result = make_entity(chunk_buf(subj->buffer, subj->pos, match)); + result = make_entity(chunk_dup(&subj->input, subj->pos, match)); subj->pos += match; } else { advance(subj); @@ -584,15 +552,13 @@ static inl* handle_entity(subject* subj) // Returns an inline sequence consisting of str and entity elements. static inl *make_str_with_entities(chunk *content) { - inl * result = NULL; - inl * new; + inl *result = NULL; + inl *new; int searchpos; char c; subject subj; - gh_buf content_buf = GH_BUF_INIT; - gh_buf_set(&content_buf, content->data, content->len); - init_subject(&subj, &content_buf, 0, NULL); + subject_from_chunk(&subj, content, NULL); while ((c = peek_char(&subj))) { switch (c) { @@ -600,18 +566,13 @@ static inl *make_str_with_entities(chunk *content) new = handle_entity(&subj); break; default: - searchpos = gh_buf_strchr(subj.buffer, '&', subj.pos); - if (searchpos < 0) { - searchpos = gh_buf_len(subj.buffer); - } - - new = make_str(chunk_buf(subj.buffer, subj.pos, searchpos - subj.pos)); + searchpos = chunk_strchr(&subj.input, '&', subj.pos); + new = make_str(chunk_dup(&subj.input, subj.pos, searchpos - subj.pos)); subj.pos = searchpos; } result = append_inlines(result, new); } - gh_buf_free(&content_buf); return result; } @@ -678,9 +639,9 @@ static inl* handle_pointy_brace(subject* subj) advance(subj); // advance past first < // first try to match a URL autolink - matchlen = scan_autolink_uri(subj->buffer, subj->pos); + matchlen = scan_autolink_uri(&subj->input, subj->pos); if (matchlen > 0) { - contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1); + contents = chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; return make_link( @@ -691,11 +652,11 @@ static inl* handle_pointy_brace(subject* subj) } // next try to match an email autolink - matchlen = scan_autolink_email(subj->buffer, subj->pos); + matchlen = scan_autolink_email(&subj->input, subj->pos); if (matchlen > 0) { gh_buf mail_url = GH_BUF_INIT; - contents = chunk_buf(subj->buffer, subj->pos, matchlen - 1); + contents = chunk_dup(&subj->input, subj->pos, matchlen - 1); subj->pos += matchlen; gh_buf_puts(&mail_url, "mailto:"); @@ -709,9 +670,9 @@ static inl* handle_pointy_brace(subject* subj) } // finally, try to match an html tag - matchlen = scan_html_tag(subj->buffer, subj->pos); + matchlen = scan_html_tag(&subj->input, subj->pos); if (matchlen > 0) { - contents = chunk_buf(subj->buffer, subj->pos - 1, matchlen + 1); + contents = chunk_dup(&subj->input, subj->pos - 1, matchlen + 1); subj->pos += matchlen; return make_raw_html(contents); } @@ -776,12 +737,7 @@ static int link_label(subject* subj, chunk *raw_label) } } if (c == ']') { - *raw_label = chunk_buf( - subj->buffer, - startpos + 1, - subj->pos - (startpos + 1) - ); - + *raw_label = chunk_dup(&subj->input, startpos + 1, subj->pos - (startpos + 1)); subj->label_nestlevel = 0; advance(subj); // advance past ] return 1; @@ -813,25 +769,25 @@ static inl* handle_left_bracket(subject* subj) if (found_label) { if (peek_char(subj) == '(' && - ((sps = scan_spacechars(subj->buffer, subj->pos + 1)) > -1) && - ((n = scan_link_url(subj->buffer, subj->pos + 1 + sps)) > -1)) { + ((sps = scan_spacechars(&subj->input, subj->pos + 1)) > -1) && + ((n = scan_link_url(&subj->input, subj->pos + 1 + sps)) > -1)) { // try to parse an explicit link: starturl = subj->pos + 1 + sps; // after ( endurl = starturl + n; - starttitle = endurl + scan_spacechars(subj->buffer, endurl); + starttitle = endurl + scan_spacechars(&subj->input, endurl); // ensure there are spaces btw url and title endtitle = (starttitle == endurl) ? starttitle : - starttitle + scan_link_title(subj->buffer, starttitle); + starttitle + scan_link_title(&subj->input, starttitle); - endall = endtitle + scan_spacechars(subj->buffer, endtitle); + endall = endtitle + scan_spacechars(&subj->input, endtitle); - if (gh_buf_at(subj->buffer, endall) == ')') { + if (peek_at(subj, endall) == ')') { subj->pos = endall + 1; - url = chunk_buf(subj->buffer, starturl, endurl - starturl); - title = chunk_buf(subj->buffer, starttitle, endtitle - starttitle); + url = chunk_dup(&subj->input, starturl, endurl - starturl); + title = chunk_dup(&subj->input, starttitle, endtitle - starttitle); lab = parse_chunk_inlines(&rawlabel, NULL); return make_link(lab, url, title); @@ -850,7 +806,7 @@ static inl* handle_left_bracket(subject* subj) // Check for reference link. // First, see if there's another label: - subj->pos = subj->pos + scan_spacechars(subj->buffer, endlabel); + subj->pos = subj->pos + scan_spacechars(&subj->input, endlabel); reflabel = rawlabel; // if followed by a nonempty link label, we change reflabel to it: @@ -892,8 +848,8 @@ static inl* handle_newline(subject *subj) advance(subj); } if (nlpos > 1 && - gh_buf_at(subj->buffer, nlpos - 1) == ' ' && - gh_buf_at(subj->buffer, nlpos - 2) == ' ') { + peek_at(subj, nlpos - 1) == ' ' && + peek_at(subj, nlpos - 2) == ' ') { return make_linebreak(); } else { return make_softbreak(); @@ -917,30 +873,22 @@ extern inl* parse_inlines_while(subject* subj, int (*f)(subject*)) inl *parse_chunk_inlines(chunk *chunk, reference** refmap) { - inl *result; subject subj; - gh_buf full_chunk = GH_BUF_INIT; - - gh_buf_set(&full_chunk, chunk->data, chunk->len); - init_subject(&subj, &full_chunk, 0, refmap); - result = parse_inlines_while(&subj, not_eof); - - gh_buf_free(&full_chunk); - return result; + subject_from_chunk(&subj, chunk, refmap); + return parse_inlines_while(&subj, not_eof); } -static int find_special_char(subject *subj) +static int subject_find_special_char(subject *subj) { int n = subj->pos + 1; - int size = (int)gh_buf_len(subj->buffer); - while (n < size) { - if (strchr("\n\\`&_*[]buffer, n))) + while (n < subj->input.len) { + if (strchr("\n\\`&_*[]input.data[n])) return n; n++; } - return -1; + return subj->input.len; } // Parse an inline, advancing subject, and add it to last element. @@ -973,11 +921,13 @@ static int parse_inline(subject* subj, inl ** last) new = handle_pointy_brace(subj); break; case '_': - if (subj->pos > 0 && (isalnum(gh_buf_at(subj->buffer, subj->pos - 1)) || - gh_buf_at(subj->buffer, subj->pos - 1) == '_')) { - new = make_str(chunk_literal("_")); - advance(subj); - break; + if (subj->pos > 0) { + unsigned char prev = peek_at(subj, subj->pos - 1); + if (isalnum(prev) || prev == '_') { + new = make_str(chunk_literal("_")); + advance(subj); + break; + } } new = handle_strong_emph(subj, '_'); @@ -1002,18 +952,13 @@ static int parse_inline(subject* subj, inl ** last) } break; default: - text_literal: - endpos = find_special_char(subj); - if (endpos < 0) { - endpos = gh_buf_len(subj->buffer); - } - - contents = chunk_buf(subj->buffer, subj->pos, endpos - subj->pos); + endpos = subject_find_special_char(subj); + contents = chunk_dup(&subj->input, subj->pos, endpos - subj->pos); subj->pos = endpos; // if we're at a newline, strip trailing spaces. if (peek_char(subj) == '\n') { - chunk_trim(&contents); + chunk_rtrim(&contents); } new = make_str(contents); @@ -1026,10 +971,10 @@ static int parse_inline(subject* subj, inl ** last) return 1; } -extern inl* parse_inlines(gh_buf *input, int input_pos, reference** refmap) +extern inl* parse_inlines(gh_buf *input, reference** refmap) { subject subj; - init_subject(&subj, input, input_pos, refmap); + subject_from_buf(&subj, input, refmap); return parse_inlines_while(&subj, not_eof); } @@ -1048,7 +993,7 @@ void spnl(subject* subj) // Modify refmap if a reference is encountered. // Return 0 if no reference found, otherwise position of subject // after reference is parsed. -extern int parse_reference(gh_buf *input, int input_pos, reference** refmap) +extern int parse_reference(gh_buf *input, reference** refmap) { subject subj; @@ -1058,9 +1003,9 @@ extern int parse_reference(gh_buf *input, int input_pos, reference** refmap) int matchlen = 0; int beforetitle; - reference * new = NULL; + reference *new = NULL; - init_subject(&subj, input, input_pos, NULL); + subject_from_buf(&subj, input, NULL); // parse label: if (!link_label(&subj, &lab)) @@ -1075,9 +1020,9 @@ extern int parse_reference(gh_buf *input, int input_pos, reference** refmap) // parse link url: spnl(&subj); - matchlen = scan_link_url(subj.buffer, subj.pos); + matchlen = scan_link_url(&subj.input, subj.pos); if (matchlen) { - url = chunk_buf(subj.buffer, subj.pos, matchlen); + url = chunk_dup(&subj.input, subj.pos, matchlen); subj.pos += matchlen; } else { return 0; @@ -1086,9 +1031,9 @@ extern int parse_reference(gh_buf *input, int input_pos, reference** refmap) // parse optional link_title beforetitle = subj.pos; spnl(&subj); - matchlen = scan_link_title(subj.buffer, subj.pos); + matchlen = scan_link_title(&subj.input, subj.pos); if (matchlen) { - title = chunk_buf(subj.buffer, subj.pos, matchlen); + title = chunk_dup(&subj.input, subj.pos, matchlen); subj.pos += matchlen; } else { subj.pos = beforetitle; diff --git a/src/print.c b/src/print.c index 0a87925..c262995 100644 --- a/src/print.c +++ b/src/print.c @@ -9,7 +9,7 @@ static void print_str(const unsigned char *s, int len) int i; if (len < 0) - len = strlen(s); + len = strlen((char *)s); putchar('"'); for (i = 0; i < len; ++i) { diff --git a/src/scanners.h b/src/scanners.h index b6e586b..f96c42d 100644 --- a/src/scanners.h +++ b/src/scanners.h @@ -1,15 +1,15 @@ -#include "buffer.h" +#include "stmd.h" -int scan_autolink_uri(const gh_buf *s, int pos); -int scan_autolink_email(const gh_buf *s, int pos); -int scan_html_tag(const gh_buf *s, int pos); -int scan_html_block_tag(const gh_buf *s, int pos); -int scan_link_url(const gh_buf *s, int pos); -int scan_link_title(const gh_buf *s, int pos); -int scan_spacechars(const gh_buf *s, int pos); -int scan_atx_header_start(const gh_buf *s, int pos); -int scan_setext_header_line(const gh_buf *s, int pos); -int scan_hrule(const gh_buf *s, int pos); -int scan_open_code_fence(const gh_buf *s, int pos); -int scan_close_code_fence(const gh_buf *s, int pos, int len); -int scan_entity(const gh_buf *s, int pos); +int scan_autolink_uri(chunk *c, int offset); +int scan_autolink_email(chunk *c, int offset); +int scan_html_tag(chunk *c, int offset); +int scan_html_block_tag(chunk *c, int offset); +int scan_link_url(chunk *c, int offset); +int scan_link_title(chunk *c, int offset); +int scan_spacechars(chunk *c, int offset); +int scan_atx_header_start(chunk *c, int offset); +int scan_setext_header_line(chunk *c, int offset); +int scan_hrule(chunk *c, int offset); +int scan_open_code_fence(chunk *c, int offset); +int scan_close_code_fence(chunk *c, int offset, int len); +int scan_entity(chunk *c, int offset); diff --git a/src/scanners.re b/src/scanners.re index 7323ef9..5ac7c15 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -1,8 +1,15 @@ -#include "buffer.h" +#include "scanners.h" + +#define SCAN_DATA \ + const unsigned char *marker = NULL; \ + const unsigned char *p = c->data + offset; \ + const unsigned char *start = p; \ + const unsigned char *end = c->data + c->len /*!re2c re2c:define:YYCTYPE = "unsigned char"; re2c:define:YYCURSOR = p; + re2c:define:YYLIMIT = end; re2c:define:YYMARKER = marker; re2c:define:YYCTXMARKER = marker; re2c:yyfill:enable = 0; @@ -55,11 +62,9 @@ */ // Try to match URI autolink after first <, returning number of chars matched. -extern int scan_autolink_uri(const gh_buf *s, int pos) +extern int scan_autolink_uri(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c scheme [:]([^\x00-\x20<>\\]|escaped_char)*[>] { return (p - start); } .? { return 0; } @@ -67,11 +72,9 @@ extern int scan_autolink_uri(const gh_buf *s, int pos) } // Try to match email autolink after first <, returning num of chars matched. -extern int scan_autolink_email(const gh_buf *s, int pos) +extern int scan_autolink_email(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+ [@] @@ -83,11 +86,9 @@ extern int scan_autolink_email(const gh_buf *s, int pos) } // Try to match an HTML tag after first <, returning num of chars matched. -extern int scan_html_tag(const gh_buf *s, int pos) +extern int scan_html_tag(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c htmltag { return (p - start); } .? { return 0; } @@ -96,11 +97,9 @@ extern int scan_html_tag(const gh_buf *s, int pos) // Try to match an HTML block tag including first <, // returning num of chars matched. -extern int scan_html_block_tag(const gh_buf *s, int pos) +extern int scan_html_block_tag(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c [<] [/] blocktagname (spacechar | [>]) { return (p - start); } [<] blocktagname (spacechar | [/>]) { return (p - start); } @@ -113,11 +112,9 @@ extern int scan_html_block_tag(const gh_buf *s, int pos) // This may optionally be contained in <..>; otherwise // whitespace and unbalanced right parentheses aren't allowed. // Newlines aren't ever allowed. -extern int scan_link_url(const gh_buf *s, int pos) +extern int scan_link_url(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c [ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); } [ \n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); } @@ -128,11 +125,9 @@ extern int scan_link_url(const gh_buf *s, int pos) // Try to match a link title (in single quotes, in double quotes, or // in parentheses), returning number of chars matched. Allow one // level of internal nesting (quotes within quotes). -extern int scan_link_title(const gh_buf *s, int pos) +extern int scan_link_title(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c ["] (escaped_char|[^"\x00])* ["] { return (p - start); } ['] (escaped_char|[^'\x00])* ['] { return (p - start); } @@ -142,10 +137,9 @@ extern int scan_link_title(const gh_buf *s, int pos) } // Match space characters, including newlines. -extern int scan_spacechars(const gh_buf *s, int pos) +extern int scan_spacechars(chunk *c, int offset) { - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c [ \t\n]* { return (p - start); } . { return 0; } @@ -153,11 +147,9 @@ extern int scan_spacechars(const gh_buf *s, int pos) } // Match ATX header start. -extern int scan_atx_header_start(const gh_buf *s, int pos) +extern int scan_atx_header_start(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c [#]{1,6} ([ ]+|[\n]) { return (p - start); } .? { return 0; } @@ -166,10 +158,9 @@ extern int scan_atx_header_start(const gh_buf *s, int pos) // Match sexext header line. Return 1 for level-1 header, // 2 for level-2, 0 for no match. -extern int scan_setext_header_line(const gh_buf *s, int pos) +extern int scan_setext_header_line(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); + SCAN_DATA; /*!re2c [=]+ [ ]* [\n] { return 1; } [-]+ [ ]* [\n] { return 2; } @@ -180,11 +171,9 @@ extern int scan_setext_header_line(const gh_buf *s, int pos) // Scan a horizontal rule line: "...three or more hyphens, asterisks, // or underscores on a line by themselves. If you wish, you may use // spaces between the hyphens or asterisks." -extern int scan_hrule(const gh_buf *s, int pos) +extern int scan_hrule(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c ([*][ ]*){3,} [ \t]* [\n] { return (p - start); } ([_][ ]*){3,} [ \t]* [\n] { return (p - start); } @@ -194,11 +183,9 @@ extern int scan_hrule(const gh_buf *s, int pos) } // Scan an opening code fence. -extern int scan_open_code_fence(const gh_buf *s, int pos) +extern int scan_open_code_fence(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c [`]{3,} / [^`\n\x00]*[\n] { return (p - start); } [~]{3,} / [^~\n\x00]*[\n] { return (p - start); } @@ -207,11 +194,9 @@ extern int scan_open_code_fence(const gh_buf *s, int pos) } // Scan a closing code fence with length at least len. -extern int scan_close_code_fence(const gh_buf *s, int pos, int len) +extern int scan_close_code_fence(chunk *c, int offset, int len) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c ([`]{3,} | [~]{3,}) / spacechar* [\n] { if (p - start > len) { @@ -225,11 +210,9 @@ extern int scan_close_code_fence(const gh_buf *s, int pos, int len) // Scans an entity. // Returns number of chars matched. -extern int scan_entity(const gh_buf *s, int pos) +extern int scan_entity(chunk *c, int offset) { - unsigned char * marker = NULL; - unsigned char * p = &(s->ptr[pos]); - unsigned char * start = p; + SCAN_DATA; /*!re2c [&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;] { return (p - start); } diff --git a/src/stmd.h b/src/stmd.h index 3e284bd..4a3c399 100644 --- a/src/stmd.h +++ b/src/stmd.h @@ -1,17 +1,15 @@ +#ifndef _STDMD_H_ +#define _STDMD_H_ + #include #include #include "buffer.h" +#include "chunk.h" #include "uthash.h" #define VERSION "0.1" #define CODE_INDENT 4 -typedef struct { - const unsigned char *data; - int len; - int alloc; -} chunk; - typedef struct Inline { enum { INL_STRING, INL_SOFTBREAK, INL_LINEBREAK, INL_CODE, INL_RAW_HTML, INL_ENTITY, INL_EMPH, INL_STRONG, INL_LINK, INL_IMAGE } tag; @@ -79,7 +77,6 @@ typedef struct Block { struct Block* parent; struct Block* top; gh_buf string_content; - int string_pos; inl* inline_content; union { struct ListData list_data; @@ -91,10 +88,10 @@ typedef struct Block { struct Block * prev; } block; -inl* parse_inlines(gh_buf *input, int input_pos, reference** refmap); +inl* parse_inlines(gh_buf *input, reference** refmap); void free_inlines(inl* e); -int parse_reference(gh_buf *input, int input_pos, reference** refmap); +int parse_reference(gh_buf *input, reference** refmap); void free_reference(reference *ref); void free_reference_map(reference **refmap); @@ -117,3 +114,4 @@ void inlines_to_html(gh_buf *html, inl *b); void utf8proc_case_fold(gh_buf *dest, const unsigned char *str, int len); +#endif -- cgit v1.2.3 From 3ec98f55bd78572195f355a2ccb891df1c91b798 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Thu, 4 Sep 2014 18:11:40 +0200 Subject: Default Makefile settings --- Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index 89ec68c..ed4ddd5 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -CFLAGS=-ggdb3 -O0 -Wall -Wextra -Wno-unused-variable -std=c99 -Isrc $(OPTFLAGS) -LDFLAGS=-ggdb3 -O0 -Wall -Wno-unused-variable # -Werror +CFLAGS=-g -O3 -Wall -Wextra -Wno-unused-variable -std=c99 -Isrc $(OPTFLAGS) +LDFLAGS=-g -O3 -Wall -Wno-unused-variable # -Werror SRCDIR=src DATADIR=data -- cgit v1.2.3 From add5dd1b9a9ba8c58cdc6ca0bb62d287acb56278 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Thu, 4 Sep 2014 19:40:27 +0200 Subject: Remove warnings --- Makefile | 4 ++-- src/inlines.c | 17 +++++++++++++++-- src/scanners.re | 41 ++++++++++++++++++++++++----------------- 3 files changed, 41 insertions(+), 21 deletions(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index ed4ddd5..0d2eb8b 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -CFLAGS=-g -O3 -Wall -Wextra -Wno-unused-variable -std=c99 -Isrc $(OPTFLAGS) -LDFLAGS=-g -O3 -Wall -Wno-unused-variable # -Werror +CFLAGS=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS) +LDFLAGS=-g -O3 -Wall -Werror SRCDIR=src DATADIR=data diff --git a/src/inlines.c b/src/inlines.c index 6bb89da..5e0f3e5 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -108,13 +108,26 @@ extern void add_reference(reference** refmap, reference* ref) } } +static unsigned char *bufdup(const unsigned char *buf) +{ + unsigned char *new = NULL; + + if (!buf) { + int len = strlen((char *)buf); + new = malloc(len + 1); + memcpy(new, buf, len + 1); + } + + return new; +} + inline static node_inl* make_link_from_reference(node_inl* label, reference *ref) { node_inl* e = (node_inl*) malloc(sizeof(node_inl)); e->tag = INL_LINK; e->content.linkable.label = label; - e->content.linkable.url = strdup(ref->url); - e->content.linkable.title = ref->title ? strdup(ref->title) : NULL; + e->content.linkable.url = bufdup(ref->url); + e->content.linkable.title = bufdup(ref->title); e->next = NULL; return e; } diff --git a/src/scanners.re b/src/scanners.re index 71103f6..28aba9d 100644 --- a/src/scanners.re +++ b/src/scanners.re @@ -1,9 +1,5 @@ #include -#define SCAN_DATA \ - const unsigned char *marker = NULL; \ - const unsigned char *start = p; \ - /*!re2c re2c:define:YYCTYPE = "unsigned char"; re2c:define:YYCURSOR = p; @@ -61,7 +57,8 @@ // Try to match URI autolink after first <, returning number of chars matched. extern int _scan_autolink_uri(const unsigned char *p) { - SCAN_DATA; + const unsigned char *marker = NULL; + const unsigned char *start = p; /*!re2c scheme [:]([^\x00-\x20<>\\]|escaped_char)*[>] { return (p - start); } .? { return 0; } @@ -71,7 +68,8 @@ extern int _scan_autolink_uri(const unsigned char *p) // Try to match email autolink after first <, returning num of chars matched. extern int _scan_autolink_email(const unsigned char *p) { - SCAN_DATA; + const unsigned char *marker = NULL; + const unsigned char *start = p; /*!re2c [a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+ [@] @@ -85,7 +83,8 @@ extern int _scan_autolink_email(const unsigned char *p) // Try to match an HTML tag after first <, returning num of chars matched. extern int _scan_html_tag(const unsigned char *p) { - SCAN_DATA; + const unsigned char *marker = NULL; + const unsigned char *start = p; /*!re2c htmltag { return (p - start); } .? { return 0; } @@ -96,7 +95,8 @@ extern int _scan_html_tag(const unsigned char *p) // returning num of chars matched. extern int _scan_html_block_tag(const unsigned char *p) { - SCAN_DATA; + const unsigned char *marker = NULL; + const unsigned char *start = p; /*!re2c [<] [/] blocktagname (spacechar | [>]) { return (p - start); } [<] blocktagname (spacechar | [/>]) { return (p - start); } @@ -111,7 +111,8 @@ extern int _scan_html_block_tag(const unsigned char *p) // Newlines aren't ever allowed. extern int _scan_link_url(const unsigned char *p) { - SCAN_DATA; + const unsigned char *marker = NULL; + const unsigned char *start = p; /*!re2c [ \n]* [<] ([^<>\n\\\x00] | escaped_char | [\\])* [>] { return (p - start); } [ \n]* (reg_char+ | escaped_char | in_parens_nosp)* { return (p - start); } @@ -124,7 +125,8 @@ extern int _scan_link_url(const unsigned char *p) // level of internal nesting (quotes within quotes). extern int _scan_link_title(const unsigned char *p) { - SCAN_DATA; + const unsigned char *marker = NULL; + const unsigned char *start = p; /*!re2c ["] (escaped_char|[^"\x00])* ["] { return (p - start); } ['] (escaped_char|[^'\x00])* ['] { return (p - start); } @@ -136,7 +138,7 @@ extern int _scan_link_title(const unsigned char *p) // Match space characters, including newlines. extern int _scan_spacechars(const unsigned char *p) { - SCAN_DATA; + const unsigned char *start = p; \ /*!re2c [ \t\n]* { return (p - start); } . { return 0; } @@ -146,7 +148,8 @@ extern int _scan_spacechars(const unsigned char *p) // Match ATX header start. extern int _scan_atx_header_start(const unsigned char *p) { - SCAN_DATA; + const unsigned char *marker = NULL; + const unsigned char *start = p; /*!re2c [#]{1,6} ([ ]+|[\n]) { return (p - start); } .? { return 0; } @@ -157,7 +160,7 @@ extern int _scan_atx_header_start(const unsigned char *p) // 2 for level-2, 0 for no match. extern int _scan_setext_header_line(const unsigned char *p) { - SCAN_DATA; + const unsigned char *marker = NULL; /*!re2c [=]+ [ ]* [\n] { return 1; } [-]+ [ ]* [\n] { return 2; } @@ -170,7 +173,8 @@ extern int _scan_setext_header_line(const unsigned char *p) // spaces between the hyphens or asterisks." extern int _scan_hrule(const unsigned char *p) { - SCAN_DATA; + const unsigned char *marker = NULL; + const unsigned char *start = p; /*!re2c ([*][ ]*){3,} [ \t]* [\n] { return (p - start); } ([_][ ]*){3,} [ \t]* [\n] { return (p - start); } @@ -182,7 +186,8 @@ extern int _scan_hrule(const unsigned char *p) // Scan an opening code fence. extern int _scan_open_code_fence(const unsigned char *p) { - SCAN_DATA; + const unsigned char *marker = NULL; + const unsigned char *start = p; /*!re2c [`]{3,} / [^`\n\x00]*[\n] { return (p - start); } [~]{3,} / [^~\n\x00]*[\n] { return (p - start); } @@ -193,7 +198,8 @@ extern int _scan_open_code_fence(const unsigned char *p) // Scan a closing code fence with length at least len. extern int _scan_close_code_fence(const unsigned char *p) { - SCAN_DATA; + const unsigned char *marker = NULL; + const unsigned char *start = p; /*!re2c ([`]{3,} | [~]{3,}) / spacechar* [\n] { return (p - start); } .? { return 0; } @@ -204,7 +210,8 @@ extern int _scan_close_code_fence(const unsigned char *p) // Returns number of chars matched. extern int _scan_entity(const unsigned char *p) { - SCAN_DATA; + const unsigned char *marker = NULL; + const unsigned char *start = p; /*!re2c [&] ([#] ([Xx][A-Fa-f0-9]{1,8}|[0-9]{1,8}) |[A-Za-z][A-Za-z0-9]{1,31} ) [;] { return (p - start); } -- cgit v1.2.3 From 61e3e606e64221eaa5cf3d83dc598d5a42818d10 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Sat, 6 Sep 2014 20:48:05 +0200 Subject: UTF8-aware detabbing and entity handling --- Makefile | 13 ++++++----- src/blocks.c | 35 ++++++------------------------ src/html/houdini.h | 2 ++ src/html/html.c | 1 - src/inlines.c | 63 ++++++++++++++++++++++-------------------------------- src/print.c | 5 ----- src/stmd.h | 3 --- src/utf8.c | 59 ++++++++++++++++++++++++++++++++++++++++++++------ 8 files changed, 95 insertions(+), 86 deletions(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index 0d2eb8b..b5e487d 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -CFLAGS=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS) -LDFLAGS=-g -O3 -Wall -Werror +CFLAGS=-g -pg -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS) +LDFLAGS=-g -pg -O3 -Wall -Werror SRCDIR=src DATADIR=data @@ -41,11 +41,11 @@ testjs: spec.txt benchjs: node js/bench.js ${BENCHINP} -HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o +HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o $(SRCDIR)/html/houdini_html_u.o STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o -$(PROG): $(SRCDIR)/main.c $(HTML_OBJ) $(STMD_OBJ) - $(CC) $(LDFLAGS) -o $@ $^ +$(PROG): $(SRCDIR)/html/html_unescape.h $(SRCDIR)/case_fold_switch.inc $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c + $(CC) $(LDFLAGS) -o $@ $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re re2c --case-insensitive -bis $< > $@ || (rm $@ && false) @@ -53,6 +53,9 @@ $(SRCDIR)/scanners.c: $(SRCDIR)/scanners.re $(SRCDIR)/case_fold_switch.inc: $(DATADIR)/CaseFolding-3.2.0.txt perl mkcasefold.pl < $< > $@ +$(SRCDIR)/html/html_unescape.h: $(SRCDIR)/html/html_unescape.gperf + gperf -I -t -N find_entity -H hash_entity -K entity -C -l --null-strings -m5 $< > $@ + .PHONY: leakcheck clean fuzztest dingus upload dingus: diff --git a/src/blocks.c b/src/blocks.c index f671b5e..8c7d49c 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -5,6 +5,8 @@ #include #include "stmd.h" +#include "utf8.h" +#include "html/houdini.h" #include "scanners.h" #include "uthash.h" @@ -184,7 +186,7 @@ static void finalize(node_block* b, int line_number) firstlinelen = strbuf_strchr(&b->string_content, '\n', 0); strbuf_init(&b->attributes.fenced_code_data.info, 0); - strbuf_set( + houdini_unescape_html_f( &b->attributes.fenced_code_data.info, b->string_content.ptr, firstlinelen @@ -369,31 +371,6 @@ static int lists_match(struct ListData list_data, list_data.bullet_char == item_data.bullet_char); } -static void expand_tabs(strbuf *ob, const unsigned char *line, size_t size) -{ - size_t i = 0, tab = 0; - - while (i < size) { - size_t org = i; - - while (i < size && line[i] != '\t') { - i++; tab++; - } - - if (i > org) - strbuf_put(ob, line + org, i - org); - - if (i >= size) - break; - - do { - strbuf_putc(ob, ' '); tab++; - } while (tab % 4); - - i++; - } -} - static node_block *finalize_document(node_block *document, int linenum) { while (document != document->top) { @@ -415,7 +392,7 @@ extern node_block *stmd_parse_file(FILE *f) node_block *document = make_document(); while (fgets((char *)buffer, sizeof(buffer), f)) { - expand_tabs(&line, buffer, strlen((char *)buffer)); + utf8proc_detab(&line, buffer, strlen((char *)buffer)); incorporate_line(&line, linenum, &document); strbuf_clear(&line); linenum++; @@ -436,10 +413,10 @@ extern node_block *stmd_parse_document(const unsigned char *buffer, size_t len) const unsigned char *eol = memchr(buffer, '\n', end - buffer); if (!eol) { - expand_tabs(&line, buffer, end - buffer); + utf8proc_detab(&line, buffer, end - buffer); buffer = end; } else { - expand_tabs(&line, buffer, (eol - buffer) + 1); + utf8proc_detab(&line, buffer, (eol - buffer) + 1); buffer += (eol - buffer) + 1; } diff --git a/src/html/houdini.h b/src/html/houdini.h index 1e54d20..5fd690d 100644 --- a/src/html/houdini.h +++ b/src/html/houdini.h @@ -25,9 +25,11 @@ extern "C" { #define HOUDINI_ESCAPED_SIZE(x) (((x) * 12) / 10) #define HOUDINI_UNESCAPED_SIZE(x) (x) +extern size_t houdini_unescape_ent(strbuf *ob, const uint8_t *src, size_t size); extern int houdini_escape_html(strbuf *ob, const uint8_t *src, size_t size); extern int houdini_escape_html0(strbuf *ob, const uint8_t *src, size_t size, int secure); extern int houdini_unescape_html(strbuf *ob, const uint8_t *src, size_t size); +extern void houdini_unescape_html_f(strbuf *ob, const uint8_t *src, size_t size); extern int houdini_escape_xml(strbuf *ob, const uint8_t *src, size_t size); extern int houdini_escape_uri(strbuf *ob, const uint8_t *src, size_t size); extern int houdini_escape_url(strbuf *ob, const uint8_t *src, size_t size); diff --git a/src/html/html.c b/src/html/html.c index 758ec80..595dfcd 100644 --- a/src/html/html.c +++ b/src/html/html.c @@ -166,7 +166,6 @@ void inlines_to_html(strbuf *html, node_inl* ils) break; case INL_RAW_HTML: - case INL_ENTITY: strbuf_put(html, ils->content.literal.data, ils->content.literal.len); diff --git a/src/inlines.c b/src/inlines.c index 6b17027..7b27150 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -5,6 +5,8 @@ #include #include "stmd.h" +#include "html/houdini.h" +#include "utf8.h" #include "uthash.h" #include "scanners.h" @@ -176,7 +178,6 @@ inline static node_inl* make_simple(int t) #define make_str(s) make_literal(INL_STRING, s) #define make_code(s) make_literal(INL_CODE, s) #define make_raw_html(s) make_literal(INL_RAW_HTML, s) -#define make_entity(s) make_literal(INL_ENTITY, s) #define make_linebreak() make_simple(INL_LINEBREAK) #define make_softbreak() make_simple(INL_SOFTBREAK) #define make_emph(contents) make_inlines(INL_EMPH, contents) @@ -191,7 +192,6 @@ extern void free_inlines(node_inl* e) case INL_STRING: case INL_RAW_HTML: case INL_CODE: - case INL_ENTITY: chunk_free(&e->content.literal); break; case INL_LINEBREAK: @@ -540,45 +540,34 @@ static node_inl* handle_backslash(subject *subj) // Assumes the subject has an '&' character at the current position. static node_inl* handle_entity(subject* subj) { - int match; - node_inl *result; - match = scan_entity(&subj->input, subj->pos); - if (match) { - result = make_entity(chunk_dup(&subj->input, subj->pos, match)); - subj->pos += match; - } else { - advance(subj); - result = make_str(chunk_literal("&")); - } - return result; + strbuf ent = GH_BUF_INIT; + size_t len; + + advance(subj); + + len = houdini_unescape_ent(&ent, + subj->input.data + subj->pos, + subj->input.len - subj->pos + ); + + if (len == 0) + return make_str(chunk_literal("&")); + + subj->pos += len; + return make_str(chunk_buf_detach(&ent)); } // Like make_str, but parses entities. // Returns an inline sequence consisting of str and entity elements. static node_inl *make_str_with_entities(chunk *content) { - node_inl *result = NULL; - node_inl *new; - int searchpos; - char c; - subject subj; - - subject_from_chunk(&subj, content, NULL); + strbuf unescaped = GH_BUF_INIT; - while ((c = peek_char(&subj))) { - switch (c) { - case '&': - new = handle_entity(&subj); - break; - default: - searchpos = chunk_strchr(&subj.input, '&', subj.pos); - new = make_str(chunk_dup(&subj.input, subj.pos, searchpos - subj.pos)); - subj.pos = searchpos; - } - result = append_inlines(result, new); + if (houdini_unescape_html(&unescaped, content->data, (size_t)content->len)) { + return make_str(chunk_buf_detach(&unescaped)); + } else { + return make_str(*content); } - - return result; } // Destructively unescape a string: remove backslashes before punctuation chars. @@ -611,9 +600,9 @@ static unsigned char *clean_url(chunk *url, int is_email) strbuf_puts(&buf, "mailto:"); if (url->data[0] == '<' && url->data[url->len - 1] == '>') { - strbuf_put(&buf, url->data + 1, url->len - 2); + houdini_unescape_html_f(&buf, url->data + 1, url->len - 2); } else { - strbuf_put(&buf, url->data, url->len); + houdini_unescape_html_f(&buf, url->data, url->len); } unescape_buffer(&buf); @@ -636,9 +625,9 @@ static unsigned char *clean_title(chunk *title) if ((first == '\'' && last == '\'') || (first == '(' && last == ')') || (first == '"' && last == '"')) { - strbuf_set(&buf, title->data + 1, title->len - 2); + houdini_unescape_html_f(&buf, title->data + 1, title->len - 2); } else { - strbuf_set(&buf, title->data, title->len); + houdini_unescape_html_f(&buf, title->data, title->len); } unescape_buffer(&buf); diff --git a/src/print.c b/src/print.c index 0ff86fa..9240dac 100644 --- a/src/print.c +++ b/src/print.c @@ -145,11 +145,6 @@ extern void print_inlines(node_inl* ils, int indent) print_str(ils->content.literal.data, ils->content.literal.len); putchar('\n'); break; - case INL_ENTITY: - printf("entity "); - print_str(ils->content.literal.data, ils->content.literal.len); - putchar('\n'); - break; case INL_LINK: case INL_IMAGE: printf("%s url=", ils->tag == INL_LINK ? "link" : "image"); diff --git a/src/stmd.h b/src/stmd.h index be65371..c80eeda 100644 --- a/src/stmd.h +++ b/src/stmd.h @@ -17,7 +17,6 @@ struct node_inl { INL_LINEBREAK, INL_CODE, INL_RAW_HTML, - INL_ENTITY, INL_EMPH, INL_STRONG, INL_LINK, @@ -133,6 +132,4 @@ void print_blocks(node_block* blk, int indent); void blocks_to_html(strbuf *html, node_block *b, bool tight); void inlines_to_html(strbuf *html, node_inl *b); -void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len); - #endif diff --git a/src/utf8.c b/src/utf8.c index cebd872..12d7ba5 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -3,7 +3,7 @@ #include #include -#include "stmd.h" +#include "utf8.h" static const int8_t utf8proc_utf8class[256] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -23,6 +23,12 @@ static const int8_t utf8proc_utf8class[256] = { 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0 }; +static void encode_unknown(strbuf *buf) +{ + static const unsigned char repl[] = {239, 191, 189}; + strbuf_put(buf, repl, 3); +} + ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len) { ssize_t length, i; @@ -46,6 +52,46 @@ ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len) return length; } +void utf8proc_detab(strbuf *ob, const unsigned char *line, size_t size) +{ + static const unsigned char whitespace[] = " "; + + size_t i = 0, tab = 0; + + while (i < size) { + size_t org = i; + + while (i < size && line[i] != '\t' && line[i] <= 0x80) { + i++; tab++; + } + + if (i > org) + strbuf_put(ob, line + org, i - org); + + if (i >= size) + break; + + if (line[i] == '\t') { + int numspaces = 4 - (tab % 4); + strbuf_put(ob, whitespace, numspaces); + i += 1; + tab += numspaces; + } else { + ssize_t charlen = utf8proc_charlen(line + i, size - i); + + if (charlen < 0) { + encode_unknown(ob); + i++; + } else { + strbuf_put(ob, line + i, charlen); + i += charlen; + } + + tab += 1; + } + } +} + ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst) { ssize_t length; @@ -89,9 +135,9 @@ void utf8proc_encode_char(int32_t uc, strbuf *buf) unsigned char dst[4]; int len = 0; - if (uc < 0x00) { - assert(false); - } else if (uc < 0x80) { + assert(uc >= 0); + + if (uc < 0x80) { dst[0] = uc; len = 1; } else if (uc < 0x800) { @@ -116,7 +162,8 @@ void utf8proc_encode_char(int32_t uc, strbuf *buf) dst[3] = 0x80 + (uc & 0x3F); len = 4; } else { - assert(false); + encode_unknown(buf); + return; } strbuf_put(buf, dst, len); @@ -133,7 +180,7 @@ void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len) ssize_t char_len = utf8proc_iterate(str, len, &c); if (char_len < 0) { - bufpush(0xFFFD); + encode_unknown(dest); continue; } -- cgit v1.2.3 From 9d86d2f32303ae0048f6a5daa552bacceb9b12ea Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Tue, 9 Sep 2014 04:00:36 +0200 Subject: Update the spec with better entity handling --- Makefile | 4 ++-- spec.txt | 22 ++++++++++++---------- src/html/houdini_html_u.c | 2 +- 3 files changed, 15 insertions(+), 13 deletions(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index b5e487d..5d13272 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ -CFLAGS=-g -pg -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS) -LDFLAGS=-g -pg -O3 -Wall -Werror +CFLAGS=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS) +LDFLAGS=-g -O3 -Wall -Werror SRCDIR=src DATADIR=data diff --git a/spec.txt b/spec.txt index ebd6d98..112dccc 100644 --- a/spec.txt +++ b/spec.txt @@ -3762,20 +3762,20 @@ as the "unknown codepoint" character (`0xFFFD`) [Hexadecimal entities](#hexadecimal-entities) consist of `&#` + either `X` or `x` + a string of 1-8 hexadecimal digits -+ `;`. ++ `;`. They will also be parsed and turned into their corresponding UTF8 values in the AST. . - " ആ ಫ +" ആ ಫ . -

     " ആ ಫ

    +

    " ആ ಫ

    . Here are some nonentities: . -  &x; &#; &#x; � &ThisIsWayTooLongToBeAnEntityIsntIt; &hi?; +  &x; &#; &#x; &ThisIsWayTooLongToBeAnEntityIsntIt; &hi?; . -

    &nbsp &x; &#; &#x; &#123456789; &ThisIsWayTooLongToBeAnEntityIsntIt; &hi?;

    +

    &nbsp &x; &#; &#x; &ThisIsWayTooLongToBeAnEntityIsntIt; &hi?;

    . Although HTML5 does accept some entities without a trailing semicolon @@ -3808,7 +3808,7 @@ code blocks, including raw HTML, URLs, [link titles](#link-title), and . [foo](/föö "föö") . -

    foo

    +

    foo

    . . @@ -3816,7 +3816,7 @@ code blocks, including raw HTML, URLs, [link titles](#link-title), and [foo]: /föö "föö" . -

    foo

    +

    foo

    . . @@ -3824,7 +3824,7 @@ code blocks, including raw HTML, URLs, [link titles](#link-title), and foo ``` . -
    foo
    +
    foo
     
    . @@ -4817,12 +4817,14 @@ in Markdown:

    link

    . -URL-escaping and entities should be left alone inside the destination: +URL-escaping and should be left alone inside the destination, as all URL-escaped characters +are also valid URL characters. HTML entities in the destination will be parsed into their UTF8 +codepoints, as usual, and optionally URL-escaped when written as HTML. . [link](foo%20bä) . -

    link

    +

    link

    . Note that, because titles can often be parsed as destinations, diff --git a/src/html/houdini_html_u.c b/src/html/houdini_html_u.c index 762f980..b8e2d8d 100644 --- a/src/html/houdini_html_u.c +++ b/src/html/houdini_html_u.c @@ -24,7 +24,7 @@ houdini_unescape_ent(strbuf *ob, const uint8_t *src, size_t size) codepoint = (codepoint * 16) + ((src[i] | 32) % 39 - 9); } - if (i < size && src[i] == ';') { + if (i < size && src[i] == ';' && codepoint) { utf8proc_encode_char(codepoint, ob); return i + 1; } -- cgit v1.2.3 From 94a79a605f3e76a43f1f87a5044f6761b99e5ca5 Mon Sep 17 00:00:00 2001 From: Vicent Marti Date: Wed, 10 Sep 2014 18:33:27 +0200 Subject: Cleanup reference implementation --- Makefile | 2 +- src/blocks.c | 16 ++--- src/buffer.c | 43 ++++++++++++++ src/buffer.h | 2 + src/inlines.c | 176 +++++++------------------------------------------------ src/references.c | 109 ++++++++++++++++++++++++++++++++++ src/references.h | 27 +++++++++ src/stmd.h | 26 +++----- src/utf8.c | 10 ++-- src/utf8.h | 5 +- 10 files changed, 225 insertions(+), 191 deletions(-) create mode 100644 src/references.c create mode 100644 src/references.h (limited to 'Makefile') diff --git a/Makefile b/Makefile index 5d13272..11e2141 100644 --- a/Makefile +++ b/Makefile @@ -42,7 +42,7 @@ benchjs: node js/bench.js ${BENCHINP} HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o $(SRCDIR)/html/houdini_html_u.o -STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o +STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o $(SRCDIR)/references.c $(PROG): $(SRCDIR)/html/html_unescape.h $(SRCDIR)/case_fold_switch.inc $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c $(CC) $(LDFLAGS) -o $@ $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c diff --git a/src/blocks.c b/src/blocks.c index 72b2dc2..30a8284 100644 --- a/src/blocks.c +++ b/src/blocks.c @@ -8,7 +8,6 @@ #include "utf8.h" #include "html/houdini.h" #include "scanners.h" -#include "uthash.h" #define peek_at(i, n) (i)->data[n] @@ -36,12 +35,7 @@ static node_block* make_block(int tag, int start_line, int start_column) extern node_block* make_document() { node_block *e = make_block(BLOCK_DOCUMENT, 1, 1); - reference *map = NULL; - reference ** refmap; - - refmap = (reference**) malloc(sizeof(reference*)); - *refmap = map; - e->as.document.refmap = refmap; + e->as.document.refmap = reference_map_new(); e->top = e; return e; @@ -164,7 +158,7 @@ static void finalize(node_block* b, int line_number) case BLOCK_PARAGRAPH: pos = 0; while (strbuf_at(&b->string_content, 0) == '[' && - (pos = parse_reference(&b->string_content, b->top->as.document.refmap))) { + (pos = parse_reference_inline(&b->string_content, b->top->as.document.refmap))) { strbuf_drop(&b->string_content, pos); } @@ -192,7 +186,7 @@ static void finalize(node_block* b, int line_number) strbuf_drop(&b->string_content, firstlinelen + 1); strbuf_trim(&b->as.code.info); - unescape_buffer(&b->as.code.info); + strbuf_unescape(&b->as.code.info); break; case BLOCK_LIST: // determine tight/loose status @@ -268,7 +262,7 @@ extern void free_blocks(node_block* e) if (e->tag == BLOCK_FENCED_CODE) { strbuf_free(&e->as.code.info); } else if (e->tag == BLOCK_DOCUMENT) { - free_reference_map(e->as.document.refmap); + reference_map_free(e->as.document.refmap); } free_blocks(e->children); free(e); @@ -278,7 +272,7 @@ extern void free_blocks(node_block* e) // Walk through node_block and all children, recursively, parsing // string content into inline content where appropriate. -void process_inlines(node_block* cur, reference** refmap) +void process_inlines(node_block* cur, reference_map *refmap) { switch (cur->tag) { case BLOCK_PARAGRAPH: diff --git a/src/buffer.c b/src/buffer.c index 90c2186..cdf8ca0 100644 --- a/src/buffer.c +++ b/src/buffer.c @@ -308,3 +308,46 @@ void strbuf_trim(strbuf *buf) buf->ptr[buf->size] = '\0'; } + +// Destructively modify string, collapsing consecutive +// space and newline characters into a single space. +void strbuf_normalize_whitespace(strbuf *s) +{ + bool last_char_was_space = false; + int r, w; + + for (r = 0, w = 0; r < s->size; ++r) { + switch (s->ptr[r]) { + case ' ': + case '\n': + if (last_char_was_space) + break; + + s->ptr[w++] = ' '; + last_char_was_space = true; + break; + + default: + s->ptr[w++] = s->ptr[r]; + last_char_was_space = false; + } + } + + strbuf_truncate(s, w); +} + +// Destructively unescape a string: remove backslashes before punctuation chars. +extern void strbuf_unescape(strbuf *buf) +{ + int r, w; + + for (r = 0, w = 0; r < buf->size; ++r) { + if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1])) + continue; + + buf->ptr[w++] = buf->ptr[r]; + } + + strbuf_truncate(buf, w); +} + diff --git a/src/buffer.h b/src/buffer.h index 6f45cbb..1bc1eee 100644 --- a/src/buffer.h +++ b/src/buffer.h @@ -108,5 +108,7 @@ int strbuf_strrchr(const strbuf *buf, int c, int pos); void strbuf_drop(strbuf *buf, int n); void strbuf_truncate(strbuf *buf, int len); void strbuf_trim(strbuf *buf); +void strbuf_normalize_whitespace(strbuf *s); +void strbuf_unescape(strbuf *s); #endif diff --git a/src/inlines.c b/src/inlines.c index aa0e13e..3040f09 100644 --- a/src/inlines.c +++ b/src/inlines.c @@ -7,110 +7,23 @@ #include "stmd.h" #include "html/houdini.h" #include "utf8.h" -#include "uthash.h" #include "scanners.h" typedef struct Subject { chunk input; int pos; - int label_nestlevel; - reference** reference_map; + int label_nestlevel; + reference_map *refmap; } subject; -reference* lookup_reference(reference** refmap, chunk *label); -reference* make_reference(chunk *label, chunk *url, chunk *title); - -static unsigned char *clean_url(chunk *url); -static unsigned char *clean_title(chunk *title); -static unsigned char *clean_autolink(chunk *url, int is_email); - -inline static void chunk_free(chunk *c); -inline static void chunk_trim(chunk *c); - -inline static chunk chunk_literal(const char *data); -inline static chunk chunk_buf_detach(strbuf *buf); -inline static chunk chunk_dup(const chunk *ch, int pos, int len); - -static node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap); +static node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap); static node_inl *parse_inlines_while(subject* subj, int (*f)(subject*)); static int parse_inline(subject* subj, node_inl ** last); -static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap); -static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap); +static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap); +static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap); static int subject_find_special_char(subject *subj); -static void normalize_whitespace(strbuf *s); - -extern void free_reference(reference *ref) { - free(ref->label); - free(ref->url); - free(ref->title); - free(ref); -} - -extern void free_reference_map(reference **refmap) { - /* free the hash table contents */ - reference *s; - reference *tmp; - if (refmap != NULL) { - HASH_ITER(hh, *refmap, s, tmp) { - HASH_DEL(*refmap, s); - free_reference(s); - } - free(refmap); - } -} - -// normalize reference: collapse internal whitespace to single space, -// remove leading/trailing whitespace, case fold -static unsigned char *normalize_reference(chunk *ref) -{ - strbuf normalized = GH_BUF_INIT; - - utf8proc_case_fold(&normalized, ref->data, ref->len); - strbuf_trim(&normalized); - normalize_whitespace(&normalized); - - return strbuf_detach(&normalized); -} - -// Returns reference if refmap contains a reference with matching -// label, otherwise NULL. -extern reference* lookup_reference(reference** refmap, chunk *label) -{ - reference *ref = NULL; - unsigned char *norm = normalize_reference(label); - if (refmap != NULL) { - HASH_FIND_STR(*refmap, (char*)norm, ref); - } - free(norm); - return ref; -} - -extern reference* make_reference(chunk *label, chunk *url, chunk *title) -{ - reference *ref; - ref = malloc(sizeof(reference)); - ref->label = normalize_reference(label); - ref->url = clean_url(url); - ref->title = clean_title(title); - return ref; -} - -extern void add_reference(reference** refmap, reference* ref) -{ - reference * t = NULL; - const char *label = (const char *)ref->label; - - HASH_FIND(hh, *refmap, label, strlen(label), t); - - if (t == NULL) { - HASH_ADD_KEYPTR(hh, *refmap, label, strlen(label), ref); - } else { - free_reference(ref); // we free this now since it won't be in the refmap - } -} - static unsigned char *bufdup(const unsigned char *buf) { unsigned char *new = NULL; @@ -236,26 +149,26 @@ inline static node_inl* append_inlines(node_inl* a, node_inl* b) return a; } -static void subject_from_buf(subject *e, strbuf *buffer, reference** refmap) +static void subject_from_buf(subject *e, strbuf *buffer, reference_map *refmap) { e->input.data = buffer->ptr; e->input.len = buffer->size; e->input.alloc = 0; e->pos = 0; e->label_nestlevel = 0; - e->reference_map = refmap; + e->refmap = refmap; chunk_rtrim(&e->input); } -static void subject_from_chunk(subject *e, chunk *chunk, reference** refmap) +static void subject_from_chunk(subject *e, chunk *chunk, reference_map *refmap) { e->input.data = chunk->data; e->input.len = chunk->len; e->input.alloc = 0; e->pos = 0; e->label_nestlevel = 0; - e->reference_map = refmap; + e->refmap = refmap; chunk_rtrim(&e->input); } @@ -325,33 +238,6 @@ static int scan_to_closing_backticks(subject* subj, int openticklength) return (subj->pos); } -// Destructively modify string, collapsing consecutive -// space and newline characters into a single space. -static void normalize_whitespace(strbuf *s) -{ - bool last_char_was_space = false; - int r, w; - - for (r = 0, w = 0; r < s->size; ++r) { - switch (s->ptr[r]) { - case ' ': - case '\n': - if (last_char_was_space) - break; - - s->ptr[w++] = ' '; - last_char_was_space = true; - break; - - default: - s->ptr[w++] = s->ptr[r]; - last_char_was_space = false; - } - } - - strbuf_truncate(s, w); -} - // Parse backtick code section or raw backticks, return an inline. // Assumes that the subject has a backtick at the current position. static node_inl* handle_backticks(subject *subj) @@ -368,7 +254,7 @@ static node_inl* handle_backticks(subject *subj) strbuf_set(&buf, subj->input.data + startpos, endpos - startpos - openticks.len); strbuf_trim(&buf); - normalize_whitespace(&buf); + strbuf_normalize_whitespace(&buf); return make_code(chunk_buf_detach(&buf)); } @@ -575,24 +461,9 @@ static node_inl *make_str_with_entities(chunk *content) } } -// Destructively unescape a string: remove backslashes before punctuation chars. -extern void unescape_buffer(strbuf *buf) -{ - int r, w; - - for (r = 0, w = 0; r < buf->size; ++r) { - if (buf->ptr[r] == '\\' && ispunct(buf->ptr[r + 1])) - continue; - - buf->ptr[w++] = buf->ptr[r]; - } - - strbuf_truncate(buf, w); -} - // Clean a URL: remove surrounding whitespace and surrounding <>, // and remove \ that escape punctuation. -static unsigned char *clean_url(chunk *url) +unsigned char *clean_url(chunk *url) { strbuf buf = GH_BUF_INIT; @@ -607,11 +478,11 @@ static unsigned char *clean_url(chunk *url) houdini_unescape_html_f(&buf, url->data, url->len); } - unescape_buffer(&buf); + strbuf_unescape(&buf); return strbuf_detach(&buf); } -static unsigned char *clean_autolink(chunk *url, int is_email) +unsigned char *clean_autolink(chunk *url, int is_email) { strbuf buf = GH_BUF_INIT; @@ -628,7 +499,7 @@ static unsigned char *clean_autolink(chunk *url, int is_email) } // Clean a title: remove surrounding quotes and remove \ that escape punctuation. -static unsigned char *clean_title(chunk *title) +unsigned char *clean_title(chunk *title) { strbuf buf = GH_BUF_INIT; unsigned char first, last; @@ -648,7 +519,7 @@ static unsigned char *clean_title(chunk *title) houdini_unescape_html_f(&buf, title->data, title->len); } - unescape_buffer(&buf); + strbuf_unescape(&buf); return strbuf_detach(&buf); } @@ -810,7 +681,7 @@ static node_inl* handle_left_bracket(subject* subj) } else { // if we get here, we matched a label but didn't get further: subj->pos = endlabel; - lab = parse_chunk_inlines(&rawlabel, subj->reference_map); + lab = parse_chunk_inlines(&rawlabel, subj->refmap); result = append_inlines(make_str(chunk_literal("[")), append_inlines(lab, make_str(chunk_literal("]")))); @@ -834,13 +705,13 @@ static node_inl* handle_left_bracket(subject* subj) } // lookup rawlabel in subject->reference_map: - ref = lookup_reference(subj->reference_map, &reflabel); + ref = reference_lookup(subj->refmap, &reflabel); if (ref != NULL) { // found lab = parse_chunk_inlines(&rawlabel, NULL); result = make_ref_link(lab, ref); } else { subj->pos = endlabel; - lab = parse_chunk_inlines(&rawlabel, subj->reference_map); + lab = parse_chunk_inlines(&rawlabel, subj->refmap); result = append_inlines(make_str(chunk_literal("[")), append_inlines(lab, make_str(chunk_literal("]")))); } @@ -887,7 +758,7 @@ extern node_inl* parse_inlines_while(subject* subj, int (*f)(subject*)) return result; } -node_inl *parse_chunk_inlines(chunk *chunk, reference** refmap) +node_inl *parse_chunk_inlines(chunk *chunk, reference_map *refmap) { subject subj; subject_from_chunk(&subj, chunk, refmap); @@ -987,7 +858,7 @@ static int parse_inline(subject* subj, node_inl ** last) return 1; } -extern node_inl* parse_inlines(strbuf *input, reference** refmap) +extern node_inl* parse_inlines(strbuf *input, reference_map *refmap) { subject subj; subject_from_buf(&subj, input, refmap); @@ -1009,7 +880,7 @@ void spnl(subject* subj) // Modify refmap if a reference is encountered. // Return 0 if no reference found, otherwise position of subject // after reference is parsed. -extern int parse_reference(strbuf *input, reference** refmap) +int parse_reference_inline(strbuf *input, reference_map *refmap) { subject subj; @@ -1019,7 +890,6 @@ extern int parse_reference(strbuf *input, reference** refmap) int matchlen = 0; int beforetitle; - reference *new = NULL; subject_from_buf(&subj, input, NULL); @@ -1065,9 +935,7 @@ extern int parse_reference(strbuf *input, reference** refmap) return 0; } // insert reference into refmap - new = make_reference(&lab, &url, &title); - add_reference(refmap, new); - + reference_create(refmap, &lab, &url, &title); return subj.pos; } diff --git a/src/references.c b/src/references.c new file mode 100644 index 0000000..ff64b00 --- /dev/null +++ b/src/references.c @@ -0,0 +1,109 @@ +#include "stmd.h" +#include "utf8.h" +#include "references.h" + +static unsigned int +refhash(const unsigned char *link_ref) +{ + unsigned int hash = 0; + + while (*link_ref) + hash = (*link_ref++) + (hash << 6) + (hash << 16) - hash; + + return hash; +} + +// normalize reference: collapse internal whitespace to single space, +// remove leading/trailing whitespace, case fold +static unsigned char *normalize_reference(chunk *ref) +{ + strbuf normalized = GH_BUF_INIT; + + utf8proc_case_fold(&normalized, ref->data, ref->len); + strbuf_trim(&normalized); + strbuf_normalize_whitespace(&normalized); + + return strbuf_detach(&normalized); +} + +static void add_reference(reference_map *map, reference* ref) +{ + ref->next = map->table[ref->hash % REFMAP_SIZE]; + map->table[ref->hash % REFMAP_SIZE] = ref; +} + +extern reference *reference_create(reference_map *map, chunk *label, chunk *url, chunk *title) +{ + reference *ref; + ref = malloc(sizeof(reference)); + ref->label = normalize_reference(label); + ref->hash = refhash(ref->label); + ref->url = clean_url(url); + ref->title = clean_title(title); + ref->next = NULL; + + add_reference(map, ref); + + return ref; +} + +// Returns reference if refmap contains a reference with matching +// label, otherwise NULL. +reference* reference_lookup(reference_map *map, chunk *label) +{ + reference *ref = NULL; + unsigned char *norm; + unsigned int hash; + + if (map == NULL) + return NULL; + + norm = normalize_reference(label); + hash = refhash(norm); + ref = map->table[hash % REFMAP_SIZE]; + + while (ref) { + if (ref->label[0] == norm[0] && + !strcmp((char *)ref->label, (char *)norm)) + break; + ref = ref->next; + } + + free(norm); + return ref; +} + +static void reference_free(reference *ref) +{ + free(ref->label); + free(ref->url); + free(ref->title); + free(ref); +} + +void reference_map_free(reference_map *map) +{ + unsigned int i; + + for (i = 0; i < REFMAP_SIZE; ++i) { + reference *ref = map->table[i]; + reference *next; + + while (ref) { + next = ref->next; + reference_free(ref); + ref = next; + } + } + + free(map->table); + free(map); +} + +reference_map *reference_map_new(void) +{ + reference_map *map = malloc(sizeof(reference_map)); + memset(map, 0x0, sizeof(reference_map)); + return map; +} + diff --git a/src/references.h b/src/references.h new file mode 100644 index 0000000..78fffe7 --- /dev/null +++ b/src/references.h @@ -0,0 +1,27 @@ +#ifndef _REFERENCES_H_ +#define _REFERENCES_H_ + +#define REFMAP_SIZE 16 + +struct reference { + struct reference *next; + unsigned char *label; + unsigned char *url; + unsigned char *title; + unsigned int hash; +}; + +typedef struct reference reference; + +struct reference_map { + reference *table[REFMAP_SIZE]; +}; + +typedef struct reference_map reference_map; + +reference_map *reference_map_new(void); +void reference_map_free(reference_map *map); +reference* reference_lookup(reference_map *map, chunk *label); +extern reference *reference_create(reference_map *map, chunk *label, chunk *url, chunk *title); + +#endif diff --git a/src/stmd.h b/src/stmd.h index 21a86b0..4e21e6c 100644 --- a/src/stmd.h +++ b/src/stmd.h @@ -5,7 +5,7 @@ #include #include "buffer.h" #include "chunk.h" -#include "uthash.h" +#include "references.h" #define VERSION "0.1" #define CODE_INDENT 4 @@ -36,17 +36,7 @@ struct node_inl { typedef struct node_inl node_inl; -struct reference { - unsigned char *label; - unsigned char *url; - unsigned char *title; - UT_hash_handle hh; // used by uthash -}; - -typedef struct reference reference; - // Types for blocks - struct ListData { enum { bullet, @@ -104,7 +94,7 @@ struct node_block { int level; } header; struct { - reference** refmap; + reference_map *refmap; } document; } as; @@ -114,14 +104,10 @@ struct node_block { typedef struct node_block node_block; -node_inl* parse_inlines(strbuf *input, reference** refmap); +node_inl* parse_inlines(strbuf *input, reference_map *refmap); void free_inlines(node_inl* e); -int parse_reference(strbuf *input, reference** refmap); -void free_reference(reference *ref); -void free_reference_map(reference **refmap); - -void add_reference(reference** refmap, reference* ref); +int parse_reference_inline(strbuf *input, reference_map *refmap); void unescape_buffer(strbuf *buf); extern node_block* make_document(); @@ -138,4 +124,8 @@ void print_blocks(node_block* blk, int indent); void blocks_to_html(strbuf *html, node_block *b, bool tight); void inlines_to_html(strbuf *html, node_inl *b); +unsigned char *clean_url(chunk *url); +unsigned char *clean_autolink(chunk *url, int is_email); +unsigned char *clean_title(chunk *title); + #endif diff --git a/src/utf8.c b/src/utf8.c index 12d7ba5..c65aec6 100644 --- a/src/utf8.c +++ b/src/utf8.c @@ -25,7 +25,7 @@ static const int8_t utf8proc_utf8class[256] = { static void encode_unknown(strbuf *buf) { - static const unsigned char repl[] = {239, 191, 189}; + static const uint8_t repl[] = {239, 191, 189}; strbuf_put(buf, repl, 3); } @@ -52,9 +52,9 @@ ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len) return length; } -void utf8proc_detab(strbuf *ob, const unsigned char *line, size_t size) +void utf8proc_detab(strbuf *ob, const uint8_t *line, size_t size) { - static const unsigned char whitespace[] = " "; + static const uint8_t whitespace[] = " "; size_t i = 0, tab = 0; @@ -132,7 +132,7 @@ ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst) void utf8proc_encode_char(int32_t uc, strbuf *buf) { - unsigned char dst[4]; + uint8_t dst[4]; int len = 0; assert(uc >= 0); @@ -169,7 +169,7 @@ void utf8proc_encode_char(int32_t uc, strbuf *buf) strbuf_put(buf, dst, len); } -void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len) +void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len) { int32_t c; diff --git a/src/utf8.h b/src/utf8.h index 1e4e556..9506b75 100644 --- a/src/utf8.h +++ b/src/utf8.h @@ -1,12 +1,13 @@ #ifndef _H_STMD_UTF8_ #define _H_STMD_UTF8_ +#include #include "buffer.h" -void utf8proc_case_fold(strbuf *dest, const unsigned char *str, int len); +void utf8proc_case_fold(strbuf *dest, const uint8_t *str, int len); void utf8proc_encode_char(int32_t uc, strbuf *buf); ssize_t utf8proc_iterate(const uint8_t *str, ssize_t str_len, int32_t *dst); ssize_t utf8proc_charlen(const uint8_t *str, ssize_t str_len); -void utf8proc_detab(strbuf *dest, const unsigned char *line, size_t size); +void utf8proc_detab(strbuf *dest, const uint8_t *line, size_t size); #endif -- cgit v1.2.3 From e752b9776d434f63768c50e4c73c533a43529052 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 24 Sep 2014 22:22:51 -0700 Subject: Makefile: Use ?= so variables can be set on command line. --- Makefile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index 11e2141..f5f408e 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,9 @@ -CFLAGS=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS) -LDFLAGS=-g -O3 -Wall -Werror -SRCDIR=src -DATADIR=data +CFLAGS?=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS) +LDFLAGS?=-g -O3 -Wall -Werror +SRCDIR?=src +DATADIR?=data -PROG=./stmd +PROG?=./stmd .PHONY: all oldtests test spec benchjs testjs all: $(SRCDIR)/case_fold_switch.inc $(PROG) -- cgit v1.2.3 From c006aececef112f61dd44cad43f0596221f29700 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Wed, 24 Sep 2014 22:47:47 -0700 Subject: Suppress 'missing field initializer' warnings from gperf generated header. --- Makefile | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index f5f408e..671d30d 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -CFLAGS?=-g -O3 -Wall -Wextra -std=c99 -Isrc $(OPTFLAGS) +CFLAGS?=-g -O3 -Wall -Wextra -std=c99 -Isrc -Wno-missing-field-initializers $(OPTFLAGS) LDFLAGS?=-g -O3 -Wall -Werror SRCDIR?=src DATADIR?=data @@ -42,6 +42,7 @@ benchjs: node js/bench.js ${BENCHINP} HTML_OBJ=$(SRCDIR)/html/html.o $(SRCDIR)/html/houdini_href_e.o $(SRCDIR)/html/houdini_html_e.o $(SRCDIR)/html/houdini_html_u.o + STMD_OBJ=$(SRCDIR)/inlines.o $(SRCDIR)/buffer.o $(SRCDIR)/blocks.o $(SRCDIR)/scanners.c $(SRCDIR)/print.o $(SRCDIR)/utf8.o $(SRCDIR)/references.c $(PROG): $(SRCDIR)/html/html_unescape.h $(SRCDIR)/case_fold_switch.inc $(HTML_OBJ) $(STMD_OBJ) $(SRCDIR)/main.c -- cgit v1.2.3 From bc5b7c288d29215c585db254a203889e0dea54e2 Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Tue, 7 Oct 2014 22:35:19 -0700 Subject: Removed oldtests. --- Makefile | 8 +- oldtests/Blockquotes/Indents.html | 12 - oldtests/Blockquotes/Indents.markdown | 5 - oldtests/Blockquotes/Nesting.html | 32 - oldtests/Blockquotes/Nesting.markdown | 22 - oldtests/Blockquotes/Separation.html | 39 - oldtests/Blockquotes/Separation.markdown | 29 - oldtests/Code/BlankLines.html | 33 - oldtests/Code/BlankLines.markdown | 28 - oldtests/Code/BlankLinesAtEnd.html | 14 - oldtests/Code/BlankLinesAtEnd.markdown | 14 - oldtests/Code/FenceMatching.html | 8 - oldtests/Code/FenceMatching.markdown | 10 - oldtests/Code/FencedCodeBlocks.html | 24 - oldtests/Code/FencedCodeBlocks.markdown | 35 - oldtests/Code/IndentedCodeBlocks.html | 22 - oldtests/Code/IndentedCodeBlocks.markdown | 22 - oldtests/Code/IndentedFences.html | 20 - oldtests/Code/IndentedFences.markdown | 26 - oldtests/Code/IndentedInLists.html | 22 - oldtests/Code/IndentedInLists.markdown | 17 - oldtests/Code/Inline.html | 13 - oldtests/Code/Inline.markdown | 13 - oldtests/Code/ListBreakAfter.html | 30 - oldtests/Code/ListBreakAfter.markdown | 26 - oldtests/Code/WhiteLines.html | 7 - oldtests/Code/WhiteLines.markdown | 9 - oldtests/Emphasis/Escapes.html | 1 - oldtests/Emphasis/Escapes.markdown | 1 - oldtests/Emphasis/NestedEmphAndStrong.html | 66 -- oldtests/Emphasis/NestedEmphAndStrong.markdown | 69 -- oldtests/Emphasis/Pathological.html | 24 - oldtests/Emphasis/Pathological.markdown | 26 - oldtests/Emphasis/Punctuation.html | 10 - oldtests/Emphasis/Punctuation.markdown | 19 - oldtests/HTML/Blocks.html | 18 - oldtests/HTML/Blocks.markdown | 26 - oldtests/HTML/Inline.html | 8 - oldtests/HTML/Inline.markdown | 8 - oldtests/HTML/UppercaseTags.html | 4 - oldtests/HTML/UppercaseTags.markdown | 5 - oldtests/Headers/ATX.html | 14 - oldtests/Headers/ATX.markdown | 20 - oldtests/Headers/Setext.html | 9 - oldtests/Headers/Setext.markdown | 17 - oldtests/Links/AngleBrackets.html | 3 - oldtests/Links/AngleBrackets.markdown | 7 - oldtests/Links/AutoLinks.html | 7 - oldtests/Links/AutoLinks.markdown | 7 - oldtests/Links/BackticksInLinks.html | 1 - oldtests/Links/BackticksInLinks.markdown | 1 - oldtests/Links/CaseInsensitiveReferences.html | 1 - oldtests/Links/CaseInsensitiveReferences.markdown | 3 - oldtests/Links/Entities.html | 2 - oldtests/Links/Entities.markdown | 3 - oldtests/Links/InlineLinks.html | 10 - oldtests/Links/InlineLinks.markdown | 9 - oldtests/Links/ParensInURLs.html | 6 - oldtests/Links/ParensInURLs.markdown | 14 - oldtests/Links/ReferenceLinks.html | 7 - oldtests/Links/ReferenceLinks.markdown | 10 - oldtests/Lists/CodeBlocksInLists.html | 14 - oldtests/Lists/CodeBlocksInLists.markdown | 18 - oldtests/Lists/ConsecutiveLists.html | 20 - oldtests/Lists/ConsecutiveLists.markdown | 10 - oldtests/Lists/EmptyListItem.html | 10 - oldtests/Lists/EmptyListItem.markdown | 7 - oldtests/Lists/InBlockquote.html | 22 - oldtests/Lists/InBlockquote.markdown | 12 - oldtests/Lists/Indents.html | 22 - oldtests/Lists/Indents.markdown | 17 - oldtests/Lists/ListsAndHRs.html | 7 - oldtests/Lists/ListsAndHRs.markdown | 3 - oldtests/Lists/ListsAndSetextHeaders.html | 6 - oldtests/Lists/ListsAndSetextHeaders.markdown | 4 - oldtests/Lists/MultipleBlankLines.html | 56 -- oldtests/Lists/MultipleBlankLines.markdown | 37 - oldtests/Lists/Start.html | 11 - oldtests/Lists/Start.markdown | 7 - oldtests/Lists/Sublists.html | 49 -- oldtests/Lists/Sublists.markdown | 24 - oldtests/Lists/TightAndLoose.html | 49 -- oldtests/Lists/TightAndLoose.markdown | 45 -- oldtests/Lists/TightLooseBlockquote.html | 32 - oldtests/Lists/TightLooseBlockquote.markdown | 25 - oldtests/Lists/TightLooseMore.html | 7 - oldtests/Lists/TightLooseMore.markdown | 4 - oldtests/Lists/TwoBlankLinesEndList.html | 21 - oldtests/Lists/TwoBlankLinesEndList.markdown | 20 - oldtests/Makefile | 55 -- oldtests/Misc/BackslashEscapes.html | 14 - oldtests/Misc/BackslashEscapes.markdown | 19 - oldtests/Misc/Laziness.html | 22 - oldtests/Misc/Laziness.markdown | 14 - oldtests/Misc/LineBreaks.html | 11 - oldtests/Misc/LineBreaks.markdown | 18 - oldtests/Misc/Transitions.html | 26 - oldtests/Misc/Transitions.markdown | 20 - oldtests/Original/Amps_and_angle_encoding.html | 9 - oldtests/Original/Amps_and_angle_encoding.markdown | 21 - oldtests/Original/Auto_links.html | 13 - oldtests/Original/Auto_links.markdown | 13 - oldtests/Original/Backslash_escapes.html | 75 -- oldtests/Original/Backslash_escapes.markdown | 120 --- .../Original/Blockquotes_with_code_blocks.html | 12 - .../Original/Blockquotes_with_code_blocks.markdown | 11 - oldtests/Original/Code_Blocks.html | 12 - oldtests/Original/Code_Blocks.markdown | 14 - oldtests/Original/Code_Spans.html | 3 - oldtests/Original/Code_Spans.markdown | 5 - oldtests/Original/Horizontal_rules.html | 39 - oldtests/Original/Horizontal_rules.markdown | 67 -- oldtests/Original/Images.html | 11 - oldtests/Original/Images.markdown | 26 - oldtests/Original/Inline_HTML_Advanced.html | 23 - oldtests/Original/Inline_HTML_Advanced.markdown | 30 - oldtests/Original/Inline_HTML_Simple.html | 45 -- oldtests/Original/Inline_HTML_Simple.markdown | 69 -- oldtests/Original/Inline_HTML_comments.html | 8 - oldtests/Original/Inline_HTML_comments.markdown | 13 - oldtests/Original/Links_inline_style.html | 12 - oldtests/Original/Links_inline_style.markdown | 24 - oldtests/Original/Links_reference_style.html | 28 - oldtests/Original/Links_reference_style.markdown | 71 -- oldtests/Original/Links_shortcut_references.html | 6 - .../Original/Links_shortcut_references.markdown | 20 - oldtests/Original/Literal_quotes_in_titles.html | 2 - .../Original/Literal_quotes_in_titles.markdown | 7 - .../Original/Markdown_Documentation_Basics.html | 242 ------ .../Markdown_Documentation_Basics.markdown | 306 ------- .../Original/Markdown_Documentation_Syntax.html | 708 ---------------- .../Markdown_Documentation_Syntax.markdown | 888 --------------------- oldtests/Original/Nested_blockquotes.html | 7 - oldtests/Original/Nested_blockquotes.markdown | 5 - oldtests/Original/Ordered_and_unordered_lists.html | 112 --- .../Original/Ordered_and_unordered_lists.markdown | 131 --- oldtests/Original/README | 15 - oldtests/Original/Strong_and_em_together.html | 4 - oldtests/Original/Strong_and_em_together.markdown | 7 - oldtests/Original/Tabs.html | 19 - oldtests/Original/Tabs.markdown | 21 - oldtests/Original/Tidyness.html | 8 - oldtests/Original/Tidyness.markdown | 5 - oldtests/Tabs/TabConversionUnicode.html | 1 - oldtests/Tabs/TabConversionUnicode.markdown | 1 - 145 files changed, 3 insertions(+), 5020 deletions(-) delete mode 100644 oldtests/Blockquotes/Indents.html delete mode 100644 oldtests/Blockquotes/Indents.markdown delete mode 100644 oldtests/Blockquotes/Nesting.html delete mode 100644 oldtests/Blockquotes/Nesting.markdown delete mode 100644 oldtests/Blockquotes/Separation.html delete mode 100644 oldtests/Blockquotes/Separation.markdown delete mode 100644 oldtests/Code/BlankLines.html delete mode 100644 oldtests/Code/BlankLines.markdown delete mode 100644 oldtests/Code/BlankLinesAtEnd.html delete mode 100644 oldtests/Code/BlankLinesAtEnd.markdown delete mode 100644 oldtests/Code/FenceMatching.html delete mode 100644 oldtests/Code/FenceMatching.markdown delete mode 100644 oldtests/Code/FencedCodeBlocks.html delete mode 100644 oldtests/Code/FencedCodeBlocks.markdown delete mode 100644 oldtests/Code/IndentedCodeBlocks.html delete mode 100644 oldtests/Code/IndentedCodeBlocks.markdown delete mode 100644 oldtests/Code/IndentedFences.html delete mode 100644 oldtests/Code/IndentedFences.markdown delete mode 100644 oldtests/Code/IndentedInLists.html delete mode 100644 oldtests/Code/IndentedInLists.markdown delete mode 100644 oldtests/Code/Inline.html delete mode 100644 oldtests/Code/Inline.markdown delete mode 100644 oldtests/Code/ListBreakAfter.html delete mode 100644 oldtests/Code/ListBreakAfter.markdown delete mode 100644 oldtests/Code/WhiteLines.html delete mode 100644 oldtests/Code/WhiteLines.markdown delete mode 100644 oldtests/Emphasis/Escapes.html delete mode 100644 oldtests/Emphasis/Escapes.markdown delete mode 100644 oldtests/Emphasis/NestedEmphAndStrong.html delete mode 100644 oldtests/Emphasis/NestedEmphAndStrong.markdown delete mode 100644 oldtests/Emphasis/Pathological.html delete mode 100644 oldtests/Emphasis/Pathological.markdown delete mode 100644 oldtests/Emphasis/Punctuation.html delete mode 100644 oldtests/Emphasis/Punctuation.markdown delete mode 100644 oldtests/HTML/Blocks.html delete mode 100644 oldtests/HTML/Blocks.markdown delete mode 100644 oldtests/HTML/Inline.html delete mode 100644 oldtests/HTML/Inline.markdown delete mode 100644 oldtests/HTML/UppercaseTags.html delete mode 100644 oldtests/HTML/UppercaseTags.markdown delete mode 100644 oldtests/Headers/ATX.html delete mode 100644 oldtests/Headers/ATX.markdown delete mode 100644 oldtests/Headers/Setext.html delete mode 100644 oldtests/Headers/Setext.markdown delete mode 100644 oldtests/Links/AngleBrackets.html delete mode 100644 oldtests/Links/AngleBrackets.markdown delete mode 100644 oldtests/Links/AutoLinks.html delete mode 100644 oldtests/Links/AutoLinks.markdown delete mode 100644 oldtests/Links/BackticksInLinks.html delete mode 100644 oldtests/Links/BackticksInLinks.markdown delete mode 100644 oldtests/Links/CaseInsensitiveReferences.html delete mode 100644 oldtests/Links/CaseInsensitiveReferences.markdown delete mode 100644 oldtests/Links/Entities.html delete mode 100644 oldtests/Links/Entities.markdown delete mode 100644 oldtests/Links/InlineLinks.html delete mode 100644 oldtests/Links/InlineLinks.markdown delete mode 100644 oldtests/Links/ParensInURLs.html delete mode 100644 oldtests/Links/ParensInURLs.markdown delete mode 100644 oldtests/Links/ReferenceLinks.html delete mode 100644 oldtests/Links/ReferenceLinks.markdown delete mode 100644 oldtests/Lists/CodeBlocksInLists.html delete mode 100644 oldtests/Lists/CodeBlocksInLists.markdown delete mode 100644 oldtests/Lists/ConsecutiveLists.html delete mode 100644 oldtests/Lists/ConsecutiveLists.markdown delete mode 100644 oldtests/Lists/EmptyListItem.html delete mode 100644 oldtests/Lists/EmptyListItem.markdown delete mode 100644 oldtests/Lists/InBlockquote.html delete mode 100644 oldtests/Lists/InBlockquote.markdown delete mode 100644 oldtests/Lists/Indents.html delete mode 100644 oldtests/Lists/Indents.markdown delete mode 100644 oldtests/Lists/ListsAndHRs.html delete mode 100644 oldtests/Lists/ListsAndHRs.markdown delete mode 100644 oldtests/Lists/ListsAndSetextHeaders.html delete mode 100644 oldtests/Lists/ListsAndSetextHeaders.markdown delete mode 100644 oldtests/Lists/MultipleBlankLines.html delete mode 100644 oldtests/Lists/MultipleBlankLines.markdown delete mode 100644 oldtests/Lists/Start.html delete mode 100644 oldtests/Lists/Start.markdown delete mode 100644 oldtests/Lists/Sublists.html delete mode 100644 oldtests/Lists/Sublists.markdown delete mode 100644 oldtests/Lists/TightAndLoose.html delete mode 100644 oldtests/Lists/TightAndLoose.markdown delete mode 100644 oldtests/Lists/TightLooseBlockquote.html delete mode 100644 oldtests/Lists/TightLooseBlockquote.markdown delete mode 100644 oldtests/Lists/TightLooseMore.html delete mode 100644 oldtests/Lists/TightLooseMore.markdown delete mode 100644 oldtests/Lists/TwoBlankLinesEndList.html delete mode 100644 oldtests/Lists/TwoBlankLinesEndList.markdown delete mode 100644 oldtests/Makefile delete mode 100644 oldtests/Misc/BackslashEscapes.html delete mode 100644 oldtests/Misc/BackslashEscapes.markdown delete mode 100644 oldtests/Misc/Laziness.html delete mode 100644 oldtests/Misc/Laziness.markdown delete mode 100644 oldtests/Misc/LineBreaks.html delete mode 100644 oldtests/Misc/LineBreaks.markdown delete mode 100644 oldtests/Misc/Transitions.html delete mode 100644 oldtests/Misc/Transitions.markdown delete mode 100644 oldtests/Original/Amps_and_angle_encoding.html delete mode 100644 oldtests/Original/Amps_and_angle_encoding.markdown delete mode 100644 oldtests/Original/Auto_links.html delete mode 100644 oldtests/Original/Auto_links.markdown delete mode 100644 oldtests/Original/Backslash_escapes.html delete mode 100644 oldtests/Original/Backslash_escapes.markdown delete mode 100644 oldtests/Original/Blockquotes_with_code_blocks.html delete mode 100644 oldtests/Original/Blockquotes_with_code_blocks.markdown delete mode 100644 oldtests/Original/Code_Blocks.html delete mode 100644 oldtests/Original/Code_Blocks.markdown delete mode 100644 oldtests/Original/Code_Spans.html delete mode 100644 oldtests/Original/Code_Spans.markdown delete mode 100644 oldtests/Original/Horizontal_rules.html delete mode 100644 oldtests/Original/Horizontal_rules.markdown delete mode 100644 oldtests/Original/Images.html delete mode 100644 oldtests/Original/Images.markdown delete mode 100644 oldtests/Original/Inline_HTML_Advanced.html delete mode 100644 oldtests/Original/Inline_HTML_Advanced.markdown delete mode 100644 oldtests/Original/Inline_HTML_Simple.html delete mode 100644 oldtests/Original/Inline_HTML_Simple.markdown delete mode 100644 oldtests/Original/Inline_HTML_comments.html delete mode 100644 oldtests/Original/Inline_HTML_comments.markdown delete mode 100644 oldtests/Original/Links_inline_style.html delete mode 100644 oldtests/Original/Links_inline_style.markdown delete mode 100644 oldtests/Original/Links_reference_style.html delete mode 100644 oldtests/Original/Links_reference_style.markdown delete mode 100644 oldtests/Original/Links_shortcut_references.html delete mode 100644 oldtests/Original/Links_shortcut_references.markdown delete mode 100644 oldtests/Original/Literal_quotes_in_titles.html delete mode 100644 oldtests/Original/Literal_quotes_in_titles.markdown delete mode 100644 oldtests/Original/Markdown_Documentation_Basics.html delete mode 100644 oldtests/Original/Markdown_Documentation_Basics.markdown delete mode 100644 oldtests/Original/Markdown_Documentation_Syntax.html delete mode 100644 oldtests/Original/Markdown_Documentation_Syntax.markdown delete mode 100644 oldtests/Original/Nested_blockquotes.html delete mode 100644 oldtests/Original/Nested_blockquotes.markdown delete mode 100644 oldtests/Original/Ordered_and_unordered_lists.html delete mode 100644 oldtests/Original/Ordered_and_unordered_lists.markdown delete mode 100644 oldtests/Original/README delete mode 100644 oldtests/Original/Strong_and_em_together.html delete mode 100644 oldtests/Original/Strong_and_em_together.markdown delete mode 100644 oldtests/Original/Tabs.html delete mode 100644 oldtests/Original/Tabs.markdown delete mode 100644 oldtests/Original/Tidyness.html delete mode 100644 oldtests/Original/Tidyness.markdown delete mode 100644 oldtests/Tabs/TabConversionUnicode.html delete mode 100644 oldtests/Tabs/TabConversionUnicode.markdown (limited to 'Makefile') diff --git a/Makefile b/Makefile index 671d30d..8d35b9d 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ DATADIR?=data PROG?=./stmd -.PHONY: all oldtests test spec benchjs testjs +.PHONY: all test spec benchjs testjs all: $(SRCDIR)/case_fold_switch.inc $(PROG) README.html: README.md template.html @@ -28,9 +28,6 @@ spec.pdf: spec.md template.tex specfilter.hs --number-sections -V documentclass=report -V tocdepth=2 \ -V classoption=twosides -oldtests: - make -C oldtests --quiet clean all - test: spec.txt perl runtests.pl $< $(PROG) @@ -63,7 +60,8 @@ dingus: cd js && echo "Starting dingus server at http://localhost:9000" && python -m SimpleHTTPServer 9000 leakcheck: $(PROG) - cat oldtests/*/*.markdown | valgrind --leak-check=full --dsymutil=yes $(PROG) + # TODO produce leaktest.md that tests everything + cat leaktest.md | valgrind --leak-check=full --dsymutil=yes $(PROG) operf: $(PROG) operf $(PROG) /dev/null diff --git a/oldtests/Blockquotes/Indents.html b/oldtests/Blockquotes/Indents.html deleted file mode 100644 index fd98ee8..0000000 --- a/oldtests/Blockquotes/Indents.html +++ /dev/null @@ -1,12 +0,0 @@ -
    -

    one -blockquote

    -
    -
    -
    -
    -

    triply nested -triply nested

    -
    -
    -
    diff --git a/oldtests/Blockquotes/Indents.markdown b/oldtests/Blockquotes/Indents.markdown deleted file mode 100644 index f9342ff..0000000 --- a/oldtests/Blockquotes/Indents.markdown +++ /dev/null @@ -1,5 +0,0 @@ -> one - > blockquote - ->>> triply nested - > > > triply nested diff --git a/oldtests/Blockquotes/Nesting.html b/oldtests/Blockquotes/Nesting.html deleted file mode 100644 index f40e999..0000000 --- a/oldtests/Blockquotes/Nesting.html +++ /dev/null @@ -1,32 +0,0 @@ -

    These are all equivalent:

    -
    -
    -

    nested -blockquote

    -
    -
    -
    -
    -

    nested -blockquote

    -
    -
    -
    -
    -

    nested -blockquote

    -
    -
    -
    -
    -

    nested -blockquote

    -
    -
    -

    This is not:

    -
    -

    nested

    -
    -

    blockquote

    -
    -
    diff --git a/oldtests/Blockquotes/Nesting.markdown b/oldtests/Blockquotes/Nesting.markdown deleted file mode 100644 index 3d67843..0000000 --- a/oldtests/Blockquotes/Nesting.markdown +++ /dev/null @@ -1,22 +0,0 @@ -These are all equivalent: - -> > nested -> > blockquote - - ->> nested ->> blockquote - - -> > nested -blockquote - - -> > nested -> blockquote - - -This is not: - -> nested -> > blockquote diff --git a/oldtests/Blockquotes/Separation.html b/oldtests/Blockquotes/Separation.html deleted file mode 100644 index 910d545..0000000 --- a/oldtests/Blockquotes/Separation.html +++ /dev/null @@ -1,39 +0,0 @@ -

    One blockquote, two paragraphs:

    -
    -

    one

    -

    two

    -
    -

    Two blockquotes:

    -
    -

    one

    -
    -
    -

    two

    -
    -

    Nested blockquote, two paragraphs:

    -
    -
    -

    one

    -

    two

    -
    -
    -

    Nested blockquote, two blockquotes:

    -
    -
    -

    one

    -
    -
    -

    two

    -
    -
    -

    Two nested blockquotes:

    -
    -
    -

    one

    -
    -
    -
    -
    -

    two

    -
    -
    diff --git a/oldtests/Blockquotes/Separation.markdown b/oldtests/Blockquotes/Separation.markdown deleted file mode 100644 index 823d865..0000000 --- a/oldtests/Blockquotes/Separation.markdown +++ /dev/null @@ -1,29 +0,0 @@ -One blockquote, two paragraphs: - -> one -> -> two - -Two blockquotes: - -> one - -> two - -Nested blockquote, two paragraphs: - -> > one -> > -> > two - -Nested blockquote, two blockquotes: - -> > one -> -> > two - -Two nested blockquotes: - -> > one - -> > two diff --git a/oldtests/Code/BlankLines.html b/oldtests/Code/BlankLines.html deleted file mode 100644 index ae0abf7..0000000 --- a/oldtests/Code/BlankLines.html +++ /dev/null @@ -1,33 +0,0 @@ -
    foo
    -
    -
    -
    -bar
    -
    -
    -
    foo
    -
    -
    -
    -bar
    -
    -
    -
    foo
    -
    -    
    -
    -bar
    -
    -
      -
    1. One

      -
      CodeA
      -
      -CodeB
      -
    2. -
    3. Two

      -
      CodeA
      -
    4. -
    -
      -
    1. One
    2. -
    diff --git a/oldtests/Code/BlankLines.markdown b/oldtests/Code/BlankLines.markdown deleted file mode 100644 index b0d5a0c..0000000 --- a/oldtests/Code/BlankLines.markdown +++ /dev/null @@ -1,28 +0,0 @@ - foo - - - - bar -> foo -> -> -> -> bar - foo - - - - bar - -1. One - - CodeA - - CodeB - -2. Two - - CodeA - - -1. One diff --git a/oldtests/Code/BlankLinesAtEnd.html b/oldtests/Code/BlankLinesAtEnd.html deleted file mode 100644 index ac803d9..0000000 --- a/oldtests/Code/BlankLinesAtEnd.html +++ /dev/null @@ -1,14 +0,0 @@ -
      -
    • List

      -
      code
      -
    • -
    -
      -
    • one
    • -
    • two
    • -
    -
      -
    • one -not code

    • -
    • two

    • -
    diff --git a/oldtests/Code/BlankLinesAtEnd.markdown b/oldtests/Code/BlankLinesAtEnd.markdown deleted file mode 100644 index 55879ae..0000000 --- a/oldtests/Code/BlankLinesAtEnd.markdown +++ /dev/null @@ -1,14 +0,0 @@ -* List - - code - - - * one - * two - - - -* one - not code - -* two diff --git a/oldtests/Code/FenceMatching.html b/oldtests/Code/FenceMatching.html deleted file mode 100644 index 4c7468e..0000000 --- a/oldtests/Code/FenceMatching.html +++ /dev/null @@ -1,8 +0,0 @@ -
    ```
    -
    -
    
    -`````
    -
    -````
    -
    -
    diff --git a/oldtests/Code/FenceMatching.markdown b/oldtests/Code/FenceMatching.markdown deleted file mode 100644 index d86169a..0000000 --- a/oldtests/Code/FenceMatching.markdown +++ /dev/null @@ -1,10 +0,0 @@ -````abc -``` -```` -``````blah - -````` - -```` - -``````````` diff --git a/oldtests/Code/FencedCodeBlocks.html b/oldtests/Code/FencedCodeBlocks.html deleted file mode 100644 index 4813d72..0000000 --- a/oldtests/Code/FencedCodeBlocks.html +++ /dev/null @@ -1,24 +0,0 @@ -

    This is a fenced code block:

    -
    pairs :: [(Int,Char)]
    -pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']]
    -
    -

    Here is one with tildes:

    -
    pairs :: [(Int,Char)]
    -pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']]
    -
    -

    More metadata:

    -
    pairs :: [(Int,Char)]
    -pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']]
    -
    -

    More backticks:

    -
    pairs :: [(Int,Char)]
    -pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']]
    -
    -backticks :: String
    -backticks = "`````"
    -
    -

    Without an end:

    -
    code with
    -no end
    -
    -
    diff --git a/oldtests/Code/FencedCodeBlocks.markdown b/oldtests/Code/FencedCodeBlocks.markdown deleted file mode 100644 index 6ccc6be..0000000 --- a/oldtests/Code/FencedCodeBlocks.markdown +++ /dev/null @@ -1,35 +0,0 @@ -This is a fenced code block: -```haskell -pairs :: [(Int,Char)] -pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']] -``` -Here is one with tildes: - -~~~ haskell -pairs :: [(Int,Char)] -pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']] -~~~ - -More metadata: - -```haskell numberLines start=50 -pairs :: [(Int,Char)] -pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']] -``` - -More backticks: - -```````` haskell -pairs :: [(Int,Char)] -pairs = [(x,y) | x <- [0..10], y <- ['a'..'z']] - -backticks :: String -backticks = "`````" -````````````` - -Without an end: - -``` -code with -no end - diff --git a/oldtests/Code/IndentedCodeBlocks.html b/oldtests/Code/IndentedCodeBlocks.html deleted file mode 100644 index 0b9b7e7..0000000 --- a/oldtests/Code/IndentedCodeBlocks.html +++ /dev/null @@ -1,22 +0,0 @@ -

    Indented code with two space indent in first and last line:

    -
      two spaces *hello*
    -{ more }
    -
    -  and
    -
    -

    Indented code requires a leading/trailing blank line: -quick-command --option "$*"

    -

    Indented code does not require a trailing blank line:

    -
    code
    -
    -

    and not code.

    -

    Code in blockquote:

    -
    -
    code
    -
    -
    -

    Code in list:

    -
      -
    1. code
      -
    2. -
    diff --git a/oldtests/Code/IndentedCodeBlocks.markdown b/oldtests/Code/IndentedCodeBlocks.markdown deleted file mode 100644 index 2a99db0..0000000 --- a/oldtests/Code/IndentedCodeBlocks.markdown +++ /dev/null @@ -1,22 +0,0 @@ -Indented code with two space indent in first and last line: - - two spaces *hello* - { more } - - and - -Indented code requires a leading/trailing blank line: - quick-command --option "$*" - -Indented code does not require a trailing blank line: - - code -and not code. - -Code in blockquote: - -> code - -Code in list: - -1. code diff --git a/oldtests/Code/IndentedFences.html b/oldtests/Code/IndentedFences.html deleted file mode 100644 index 66e76da..0000000 --- a/oldtests/Code/IndentedFences.html +++ /dev/null @@ -1,20 +0,0 @@ -
    a
    -
    -
    z
    -
    -
    a
    -a
    -a
    - a
    -
    -
      -
    • foo

      -
           Hello
      -
      -World
      -
    • -
    -
    -
    a
    -
    -
    diff --git a/oldtests/Code/IndentedFences.markdown b/oldtests/Code/IndentedFences.markdown deleted file mode 100644 index 098545f..0000000 --- a/oldtests/Code/IndentedFences.markdown +++ /dev/null @@ -1,26 +0,0 @@ - ``` - a - ``` - - ``` -z -``` - - ``` -a - a - a - a - ``` - -* foo - - ``` - Hello - - World - ``` - -> ``` ->a ->``` diff --git a/oldtests/Code/IndentedInLists.html b/oldtests/Code/IndentedInLists.html deleted file mode 100644 index 76ed424..0000000 --- a/oldtests/Code/IndentedInLists.html +++ /dev/null @@ -1,22 +0,0 @@ -
      -
    • code starts here
      -
    • -
    -
      -
    1. foo

      -
      code starts here
      -
    2. -
    3. foo

      -
      code starts here
      -
    4. -
    -
      -
    • foo

      -
      code starts here
      -
      -
        -
      • foo

        -
        code starts here
        -
      • -
    • -
    diff --git a/oldtests/Code/IndentedInLists.markdown b/oldtests/Code/IndentedInLists.markdown deleted file mode 100644 index 54e1af1..0000000 --- a/oldtests/Code/IndentedInLists.markdown +++ /dev/null @@ -1,17 +0,0 @@ -- code starts here - -1. foo - - code starts here - -2. foo - - code starts here - -- foo - - code starts here - - - foo - - code starts here diff --git a/oldtests/Code/Inline.html b/oldtests/Code/Inline.html deleted file mode 100644 index 9c52790..0000000 --- a/oldtests/Code/Inline.html +++ /dev/null @@ -1,13 +0,0 @@ -

    All of these are equivalent:

    -
      -
    • *hi*
    • -
    • *hi*
    • -
    • *hi*
    • -
    • *hi*
    • -
    • *hi*
    • -
    -

    Backticks in code spans:

    -
      -
    • ``code``
    • -
    • ``code``
    • -
    diff --git a/oldtests/Code/Inline.markdown b/oldtests/Code/Inline.markdown deleted file mode 100644 index 38e5b0c..0000000 --- a/oldtests/Code/Inline.markdown +++ /dev/null @@ -1,13 +0,0 @@ -All of these are equivalent: - -- `*hi*` -- ` *hi* ` -- ``*hi* `` -- ````*hi*```` -- `*hi* - ` - -Backticks in code spans: - -- ``` ``code`` ``` -- ` ``code`` ` diff --git a/oldtests/Code/ListBreakAfter.html b/oldtests/Code/ListBreakAfter.html deleted file mode 100644 index 29d6d5e..0000000 --- a/oldtests/Code/ListBreakAfter.html +++ /dev/null @@ -1,30 +0,0 @@ -
      -
    • foo

      -
        -
      • bar

        -
        code1
        -code2
        -
        -

        code?

      • -
    • -
    • foo

      -
        -
      • bar

        -
        code1
        -code2
        -
      • -
    • -
    -
    code?
    -
    -
      -
    • foo -
        -
      • bar

        -
        code1
        -code2
        -
      • -
    • -
    -
    code?
    -
    diff --git a/oldtests/Code/ListBreakAfter.markdown b/oldtests/Code/ListBreakAfter.markdown deleted file mode 100644 index 4fa79f1..0000000 --- a/oldtests/Code/ListBreakAfter.markdown +++ /dev/null @@ -1,26 +0,0 @@ -* foo - * bar - - code1 - code2 - - code? - -* foo - * bar - - code1 - code2 - - - code? - -* foo - * bar - - code1 - code2 - - - - code? diff --git a/oldtests/Code/WhiteLines.html b/oldtests/Code/WhiteLines.html deleted file mode 100644 index 7fa137f..0000000 --- a/oldtests/Code/WhiteLines.html +++ /dev/null @@ -1,7 +0,0 @@ -
    ABC
    -  
    - 
    -
    -DEF
    -
    -

    GHI

    diff --git a/oldtests/Code/WhiteLines.markdown b/oldtests/Code/WhiteLines.markdown deleted file mode 100644 index ea17af7..0000000 --- a/oldtests/Code/WhiteLines.markdown +++ /dev/null @@ -1,9 +0,0 @@ - ABC - - - - DEF - - - -GHI diff --git a/oldtests/Emphasis/Escapes.html b/oldtests/Emphasis/Escapes.html deleted file mode 100644 index 17c9e2d..0000000 --- a/oldtests/Emphasis/Escapes.html +++ /dev/null @@ -1 +0,0 @@ -

    hi* there

    diff --git a/oldtests/Emphasis/Escapes.markdown b/oldtests/Emphasis/Escapes.markdown deleted file mode 100644 index 4f14698..0000000 --- a/oldtests/Emphasis/Escapes.markdown +++ /dev/null @@ -1 +0,0 @@ -*hi\* there* \ No newline at end of file diff --git a/oldtests/Emphasis/NestedEmphAndStrong.html b/oldtests/Emphasis/NestedEmphAndStrong.html deleted file mode 100644 index b41b527..0000000 --- a/oldtests/Emphasis/NestedEmphAndStrong.html +++ /dev/null @@ -1,66 +0,0 @@ -
      -
    1. test test
    2. -
    3. test test
    4. -
    5. test test
    6. -
    7. test test
    8. -
    9. test test
    10. -
    11. test test
    12. -
    13. test test
    14. -
    15. test test
    16. -
    17. test test
    18. -
    19. test test
    20. -
    21. test test
    22. -
    23. test test
    24. -
    25. test test
    26. -
    27. test test
    28. -
    29. test test
    30. -
    31. test test
    32. -
    -

    Incorrect nesting:

    -
      -
    1. *test test* test
    2. -
    3. _test test_ test
    4. -
    5. **test test* test*
    6. -
    7. __test␣test_␣test_
    8. -
    9. test test test
    10. -
    11. test test test
    12. -
    13. test test test
    14. -
    15. test test test
    16. -
    -

    No emphasis:

    -
      -
    1. test* test *test
    2. -
    3. test** test **test
    4. -
    5. test_ test _test
    6. -
    7. test__ test __test
    8. -
    -

    Middle-word emphasis (asterisks):

    -
      -
    1. ab
    2. -
    3. ab
    4. -
    5. abc
    6. -
    7. ab
    8. -
    9. ab
    10. -
    11. abc
    12. -
    -

    Middle-word emphasis (underscore):

    -
      -
    1. _a_b
    2. -
    3. a_b_
    4. -
    5. a_b_c
    6. -
    7. __a__b
    8. -
    9. a__b__
    10. -
    11. a__b__c
    12. -
    13. my_precious_file.txt
    14. -
    -

    Tricky Cases:

    -
      -
    1. E**. Test TestTestTest
    2. -
    3. E**. Test Test Test Test
    4. -
    -

    Overlong emphasis:

    -

    Name: ____________
    -Organization: ____
    -Region/Country: __

    -

    _____Cut here_____

    -

    ____Cut here____

    diff --git a/oldtests/Emphasis/NestedEmphAndStrong.markdown b/oldtests/Emphasis/NestedEmphAndStrong.markdown deleted file mode 100644 index ec7da25..0000000 --- a/oldtests/Emphasis/NestedEmphAndStrong.markdown +++ /dev/null @@ -1,69 +0,0 @@ -1. ***test test*** -2. ___test test___ -3. *test **test*** -4. **test *test*** -5. ***test* test** -6. ***test** test* -7. ***test* test** -8. **test *test*** -9. *test **test*** -10. _test __test___ -11. __test _test___ -12. ___test_ test__ -13. ___test__ test_ -14. ___test_ test__ -15. __test _test___ -16. _test __test___ - -Incorrect nesting: - -1. *test **test* test** -2. _test __test_ test__ -3. **test *test** test* -4. __test _test__ test_ -5. *test *test* test* -6. _test _test_ test_ -7. **test **test** test** -8. __test __test__ test__ - -No emphasis: - -1. test* test *test -2. test** test **test -3. test_ test _test -4. test__ test __test - -Middle-word emphasis (asterisks): - -1. *a*b -2. a*b* -3. a*b*c -4. **a**b -5. a**b** -6. a**b**c - -Middle-word emphasis (underscore): - -1. _a_b -2. a_b_ -3. a_b_c -4. __a__b -5. a__b__ -6. a__b__c -7. my_precious_file.txt - -Tricky Cases: - -1. E**. **Test** TestTestTest -2. E**. **Test** Test Test Test - -Overlong emphasis: - -Name: ____________ -Organization: ____ -Region/Country: __ - -_____Cut here_____ - -____Cut here____ - diff --git a/oldtests/Emphasis/Pathological.html b/oldtests/Emphasis/Pathological.html deleted file mode 100644 index 37eb9fa..0000000 --- a/oldtests/Emphasis/Pathological.html +++ /dev/null @@ -1,24 +0,0 @@ -

    This input can take a long time to parse in some implementations.

    -

    *a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -aaaaa

    -

    aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa**

    diff --git a/oldtests/Emphasis/Pathological.markdown b/oldtests/Emphasis/Pathological.markdown deleted file mode 100644 index 5deb95e..0000000 --- a/oldtests/Emphasis/Pathological.markdown +++ /dev/null @@ -1,26 +0,0 @@ -This input can take a long time to parse in some implementations. - -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -*a -aaaaa - -*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a**a*a** diff --git a/oldtests/Emphasis/Punctuation.html b/oldtests/Emphasis/Punctuation.html deleted file mode 100644 index 6061b81..0000000 --- a/oldtests/Emphasis/Punctuation.html +++ /dev/null @@ -1,10 +0,0 @@ -

    Here is a _ that is cool.

    -

    Foo.

    -

    Foo.

    -

    Foo.

    -

    Foo.

    -

    Foo.

    -

    Foo.

    -

    Foo. Foo? Foo! Foo: Foo; (Foo)

    -

    Foo. Foo? Foo! Foo: Foo; (Foo)

    -

    Foo. Foo? Foo! Foo: Foo; (Foo)

    diff --git a/oldtests/Emphasis/Punctuation.markdown b/oldtests/Emphasis/Punctuation.markdown deleted file mode 100644 index e3f23b8..0000000 --- a/oldtests/Emphasis/Punctuation.markdown +++ /dev/null @@ -1,19 +0,0 @@ -Here is a _ that is _cool_. - -_Foo._ - -__Foo.__ - -___Foo.___ - -_Foo_. - -__Foo__. - -___Foo___. - -_Foo_. _Foo_? _Foo_! _Foo_: _Foo_; (_Foo_) - -__Foo__. __Foo__? __Foo__! __Foo__: __Foo__; (__Foo__) - -___Foo___. ___Foo___? ___Foo___! ___Foo___: ___Foo___; (___Foo___) diff --git a/oldtests/HTML/Blocks.html b/oldtests/HTML/Blocks.html deleted file mode 100644 index dc80335..0000000 --- a/oldtests/HTML/Blocks.html +++ /dev/null @@ -1,18 +0,0 @@ -
    -
    - *raw html* -
    -
    -
    -
    -

    this is markdown

    -
    -
    - - -
    -* raw html with trailing space -
    diff --git a/oldtests/HTML/Blocks.markdown b/oldtests/HTML/Blocks.markdown deleted file mode 100644 index a83fa66..0000000 --- a/oldtests/HTML/Blocks.markdown +++ /dev/null @@ -1,26 +0,0 @@ -
    -
    - *raw html* -
    -
    - -
    -
    - -*this is markdown* - -
    -
    - - - - - -
    -* raw html with trailing space -
    diff --git a/oldtests/HTML/Inline.html b/oldtests/HTML/Inline.html deleted file mode 100644 index 94d40ac..0000000 --- a/oldtests/HTML/Inline.html +++ /dev/null @@ -1,8 +0,0 @@ -

    hi -hi - -Hello there. -A line
    break. -

    diff --git a/oldtests/HTML/Inline.markdown b/oldtests/HTML/Inline.markdown deleted file mode 100644 index 2259421..0000000 --- a/oldtests/HTML/Inline.markdown +++ /dev/null @@ -1,8 +0,0 @@ -hi -`hi` - -Hello there. -A line
    break. - diff --git a/oldtests/HTML/UppercaseTags.html b/oldtests/HTML/UppercaseTags.html deleted file mode 100644 index 8d2d828..0000000 --- a/oldtests/HTML/UppercaseTags.html +++ /dev/null @@ -1,4 +0,0 @@ -
    -this is a block -
    -

    Here is some inline html.

    diff --git a/oldtests/HTML/UppercaseTags.markdown b/oldtests/HTML/UppercaseTags.markdown deleted file mode 100644 index b476ffb..0000000 --- a/oldtests/HTML/UppercaseTags.markdown +++ /dev/null @@ -1,5 +0,0 @@ -
    -this is a block -
    - -Here is some inline html. \ No newline at end of file diff --git a/oldtests/Headers/ATX.html b/oldtests/Headers/ATX.html deleted file mode 100644 index f375b98..0000000 --- a/oldtests/Headers/ATX.html +++ /dev/null @@ -1,14 +0,0 @@ -

    One

    -

    Two

    -

    Three

    -

    Four

    -
    Five
    -
    Six
    -

    ####### Seven

    -

    Three with

    -

    Spacing doesn't matter

    -

    Escaped final #

    -

    ## Not a header

    -

    #5 not a header

    -

    -

    (empty header)

    diff --git a/oldtests/Headers/ATX.markdown b/oldtests/Headers/ATX.markdown deleted file mode 100644 index f687aa5..0000000 --- a/oldtests/Headers/ATX.markdown +++ /dev/null @@ -1,20 +0,0 @@ -# One -## Two -### Three -#### Four -##### Five - -###### Six - -####### Seven - -### Three with ### -## Spacing doesn't matter ## -## Escaped final \## - -\## Not a header - -#5 not a header - -## -(empty header) diff --git a/oldtests/Headers/Setext.html b/oldtests/Headers/Setext.html deleted file mode 100644 index 787fb02..0000000 --- a/oldtests/Headers/Setext.html +++ /dev/null @@ -1,9 +0,0 @@ -

    Level one

    -

    Two

    -

    In a paragraph

    -

    Level two

    -

    more text

    -

    ====== -no empty headers

    -

    not a header

    -
    diff --git a/oldtests/Headers/Setext.markdown b/oldtests/Headers/Setext.markdown deleted file mode 100644 index da0c7e2..0000000 --- a/oldtests/Headers/Setext.markdown +++ /dev/null @@ -1,17 +0,0 @@ -Level one -========= - -Two ---- - -In a paragraph - -Level two ---------- -more text - -====== -no empty headers - -not a header ------------- ----- diff --git a/oldtests/Links/AngleBrackets.html b/oldtests/Links/AngleBrackets.html deleted file mode 100644 index 21ac00d..0000000 --- a/oldtests/Links/AngleBrackets.html +++ /dev/null @@ -1,3 +0,0 @@ -

    silly URL with angle brackets.

    -

    link.

    -

    link.

    diff --git a/oldtests/Links/AngleBrackets.markdown b/oldtests/Links/AngleBrackets.markdown deleted file mode 100644 index c2e06ff..0000000 --- a/oldtests/Links/AngleBrackets.markdown +++ /dev/null @@ -1,7 +0,0 @@ -[silly URL with angle brackets](). - -[link]( "title"). - -[link][]. - -[link]: "title" diff --git a/oldtests/Links/AutoLinks.html b/oldtests/Links/AutoLinks.html deleted file mode 100644 index 092353f..0000000 --- a/oldtests/Links/AutoLinks.html +++ /dev/null @@ -1,7 +0,0 @@ -

    http://google.com?query=blah&time=15 -someone.else@somedomain.com -ftp://old.ftp.server.edu -git://some.git.repo/project.git - -<http://not.an autolink> -<relative/not/autolink>

    diff --git a/oldtests/Links/AutoLinks.markdown b/oldtests/Links/AutoLinks.markdown deleted file mode 100644 index bf95b8d..0000000 --- a/oldtests/Links/AutoLinks.markdown +++ /dev/null @@ -1,7 +0,0 @@ - - - - - - - diff --git a/oldtests/Links/BackticksInLinks.html b/oldtests/Links/BackticksInLinks.html deleted file mode 100644 index ff70383..0000000 --- a/oldtests/Links/BackticksInLinks.html +++ /dev/null @@ -1 +0,0 @@ -

    the right bracket character (])

    diff --git a/oldtests/Links/BackticksInLinks.markdown b/oldtests/Links/BackticksInLinks.markdown deleted file mode 100644 index 539fd52..0000000 --- a/oldtests/Links/BackticksInLinks.markdown +++ /dev/null @@ -1 +0,0 @@ -[the right bracket character (`]`)](/url) diff --git a/oldtests/Links/CaseInsensitiveReferences.html b/oldtests/Links/CaseInsensitiveReferences.html deleted file mode 100644 index afe4557..0000000 --- a/oldtests/Links/CaseInsensitiveReferences.html +++ /dev/null @@ -1 +0,0 @@ -

    Толпой is a Russian word.

    diff --git a/oldtests/Links/CaseInsensitiveReferences.markdown b/oldtests/Links/CaseInsensitiveReferences.markdown deleted file mode 100644 index f9653b9..0000000 --- a/oldtests/Links/CaseInsensitiveReferences.markdown +++ /dev/null @@ -1,3 +0,0 @@ -[Толпой] is a Russian word. - -[ТОЛПОЙ]: /url diff --git a/oldtests/Links/Entities.html b/oldtests/Links/Entities.html deleted file mode 100644 index 252dadb..0000000 --- a/oldtests/Links/Entities.html +++ /dev/null @@ -1,2 +0,0 @@ -

    http://göögle.com

    -

    hi

    diff --git a/oldtests/Links/Entities.markdown b/oldtests/Links/Entities.markdown deleted file mode 100644 index d81ee36..0000000 --- a/oldtests/Links/Entities.markdown +++ /dev/null @@ -1,3 +0,0 @@ - - -[hi](/url "göögle & yahoo") diff --git a/oldtests/Links/InlineLinks.html b/oldtests/Links/InlineLinks.html deleted file mode 100644 index ae33f33..0000000 --- a/oldtests/Links/InlineLinks.html +++ /dev/null @@ -1,10 +0,0 @@ -
      -
    1. link
    2. -
    3. link
    4. -
    5. link
    6. -
    7. link with -linebreak.
    8. -
    9. link
    10. -
    11. [not a link] (/url)
    12. -
    diff --git a/oldtests/Links/InlineLinks.markdown b/oldtests/Links/InlineLinks.markdown deleted file mode 100644 index a822c4d..0000000 --- a/oldtests/Links/InlineLinks.markdown +++ /dev/null @@ -1,9 +0,0 @@ -1. [link](/url) -2. [link](/url "title") -3. [link](/url - "title") -4. [link *with -linebreak*](
    "title -with linebreak"). -5. [link](/url(withparens) 'and single quoted title') -6. [not a link] (/url) diff --git a/oldtests/Links/ParensInURLs.html b/oldtests/Links/ParensInURLs.html deleted file mode 100644 index 9cd6de7..0000000 --- a/oldtests/Links/ParensInURLs.html +++ /dev/null @@ -1,6 +0,0 @@ -

    Inline link 1 with parens.

    -

    Inline link 2 with parens.

    -

    Inline link 3 with non-escaped parens.

    -

    Inline link 4 with non-escaped parens.

    -

    Reference link 1 with parens.

    -

    Reference link 2 with parens.

    diff --git a/oldtests/Links/ParensInURLs.markdown b/oldtests/Links/ParensInURLs.markdown deleted file mode 100644 index bb7be4f..0000000 --- a/oldtests/Links/ParensInURLs.markdown +++ /dev/null @@ -1,14 +0,0 @@ -[Inline link 1 with parens](/url\(test\) "title"). - -[Inline link 2 with parens]( "title"). - -[Inline link 3 with non-escaped parens](/url(test) "title"). - -[Inline link 4 with non-escaped parens]( "title"). - -[Reference link 1 with parens][1]. - -[Reference link 2 with parens][2]. - - [1]: /url(test) "title" - [2]: "title" diff --git a/oldtests/Links/ReferenceLinks.html b/oldtests/Links/ReferenceLinks.html deleted file mode 100644 index 397cdb2..0000000 --- a/oldtests/Links/ReferenceLinks.html +++ /dev/null @@ -1,7 +0,0 @@ -
      -
    1. Link references can be defined anywhere.

    2. -
    -
    -

    another one

    -
    diff --git a/oldtests/Links/ReferenceLinks.markdown b/oldtests/Links/ReferenceLinks.markdown deleted file mode 100644 index ebcf5a9..0000000 --- a/oldtests/Links/ReferenceLinks.markdown +++ /dev/null @@ -1,10 +0,0 @@ -1. [Link references] can be defined anywhere. - - [Link references]: /url - (even in a list item) - -> [another] one -> -> [another]: -> /foo "can break -> lines" diff --git a/oldtests/Lists/CodeBlocksInLists.html b/oldtests/Lists/CodeBlocksInLists.html deleted file mode 100644 index fcd3e2a..0000000 --- a/oldtests/Lists/CodeBlocksInLists.html +++ /dev/null @@ -1,14 +0,0 @@ -
      -
    1. list item -code

    2. -
    3. list item

      -
      code
      -
    4. -
    5. code
      -
    6. -
    7. code
      -
    8. -
    9. code
      -code
      -
    10. -
    diff --git a/oldtests/Lists/CodeBlocksInLists.markdown b/oldtests/Lists/CodeBlocksInLists.markdown deleted file mode 100644 index 7730808..0000000 --- a/oldtests/Lists/CodeBlocksInLists.markdown +++ /dev/null @@ -1,18 +0,0 @@ -1. list item - code - -2. list item - ~~~ - code - ~~~ - -3. ~~~ - code - ~~~ - -4. ~~~ - code - ~~~ - -5. code - code diff --git a/oldtests/Lists/ConsecutiveLists.html b/oldtests/Lists/ConsecutiveLists.html deleted file mode 100644 index f8f9098..0000000 --- a/oldtests/Lists/ConsecutiveLists.html +++ /dev/null @@ -1,20 +0,0 @@ -
      -
    • one
    • -
    • one
    • -
    -
      -
    • two
    • -
    • two
    • -
    -
      -
    • three
    • -
    • three
    • -
    -
      -
    1. four
    2. -
    3. four
    4. -
    -
      -
    1. five
    2. -
    3. five
    4. -
    diff --git a/oldtests/Lists/ConsecutiveLists.markdown b/oldtests/Lists/ConsecutiveLists.markdown deleted file mode 100644 index c4faa54..0000000 --- a/oldtests/Lists/ConsecutiveLists.markdown +++ /dev/null @@ -1,10 +0,0 @@ -* one -* one -+ two -+ two -- three -- three -1. four -1. four -1) five -1) five diff --git a/oldtests/Lists/EmptyListItem.html b/oldtests/Lists/EmptyListItem.html deleted file mode 100644 index 2c23fe1..0000000 --- a/oldtests/Lists/EmptyListItem.html +++ /dev/null @@ -1,10 +0,0 @@ -
      -
    • one
    • -
    • -
    • three
    • -
    -
      -
    1. one
    2. -
    3. -
    4. three
    5. -
    diff --git a/oldtests/Lists/EmptyListItem.markdown b/oldtests/Lists/EmptyListItem.markdown deleted file mode 100644 index d30cbc3..0000000 --- a/oldtests/Lists/EmptyListItem.markdown +++ /dev/null @@ -1,7 +0,0 @@ -- one -- -- three - -1. one -2. -3. three diff --git a/oldtests/Lists/InBlockquote.html b/oldtests/Lists/InBlockquote.html deleted file mode 100644 index da233e8..0000000 --- a/oldtests/Lists/InBlockquote.html +++ /dev/null @@ -1,22 +0,0 @@ -
    -
      -
    • tight
    • -
    • tight
    • -
    -
    -
    -
      -
    • loose

    • -
    • loose

    • -
    -
    -
    -
      -
    • one-item list
    • -
    -
    -
    -
      -
    • one-item list
    • -
    -
    diff --git a/oldtests/Lists/InBlockquote.markdown b/oldtests/Lists/InBlockquote.markdown deleted file mode 100644 index 511563b..0000000 --- a/oldtests/Lists/InBlockquote.markdown +++ /dev/null @@ -1,12 +0,0 @@ -> - tight -> - tight - - -> - loose -> -> - loose - - -> - one-item list - -> - one-item list diff --git a/oldtests/Lists/Indents.html b/oldtests/Lists/Indents.html deleted file mode 100644 index a11a5a6..0000000 --- a/oldtests/Lists/Indents.html +++ /dev/null @@ -1,22 +0,0 @@ -
    -
      -
    • foo

      -

      bar

    • -
    -
    -
      -
    • one
    • -
    • two
    • -
    -
      -
    • one
    • -
    • two
    • -
    • three
    • -
    -
      -
    • one -
        -
      • two
      • -
    • -
    • three
    • -
    diff --git a/oldtests/Lists/Indents.markdown b/oldtests/Lists/Indents.markdown deleted file mode 100644 index 293d112..0000000 --- a/oldtests/Lists/Indents.markdown +++ /dev/null @@ -1,17 +0,0 @@ - > * foo -> -> bar - - - - one - - two - - -- one - - two -- three - - -- one - - two -- three diff --git a/oldtests/Lists/ListsAndHRs.html b/oldtests/Lists/ListsAndHRs.html deleted file mode 100644 index 40826f7..0000000 --- a/oldtests/Lists/ListsAndHRs.html +++ /dev/null @@ -1,7 +0,0 @@ -
      -
    • item 1 -
        -
      • item 2
      • -
    • -
    -
    diff --git a/oldtests/Lists/ListsAndHRs.markdown b/oldtests/Lists/ListsAndHRs.markdown deleted file mode 100644 index 19c07e7..0000000 --- a/oldtests/Lists/ListsAndHRs.markdown +++ /dev/null @@ -1,3 +0,0 @@ -* item 1 - * item 2 -* * * * * diff --git a/oldtests/Lists/ListsAndSetextHeaders.html b/oldtests/Lists/ListsAndSetextHeaders.html deleted file mode 100644 index c6af9eb..0000000 --- a/oldtests/Lists/ListsAndSetextHeaders.html +++ /dev/null @@ -1,6 +0,0 @@ -
      -
    1. item
    2. -
    3. item -Not header
    4. -
    -
    diff --git a/oldtests/Lists/ListsAndSetextHeaders.markdown b/oldtests/Lists/ListsAndSetextHeaders.markdown deleted file mode 100644 index acfa655..0000000 --- a/oldtests/Lists/ListsAndSetextHeaders.markdown +++ /dev/null @@ -1,4 +0,0 @@ -1. item -2. item -Not header ----------- diff --git a/oldtests/Lists/MultipleBlankLines.html b/oldtests/Lists/MultipleBlankLines.html deleted file mode 100644 index d894db1..0000000 --- a/oldtests/Lists/MultipleBlankLines.html +++ /dev/null @@ -1,56 +0,0 @@ -
      -
    1. First Item

      -
        -
      • one

        -
          -
        • two
        • -
      • -
      • one

        -
          -
        • two
        • -
      • -
    2. -
    3. Second Item

      -
        -
      • one -
          -
        • two
        • -
      • -
    4. -
    -
      -
    • one -
        -
      • two
      • -
    • -
    -
      -
    1. Third Item

      -
        -
      • one -
          -
        • two
        • -
      • -
    2. -
    -
      -
    • one -
        -
      • two
      • -
    • -
    -
      -
    1. Fourth Item

      -
        -
      • one -
          -
        • two
        • -
      • -
    2. -
    -
      -
    • one -
        -
      • two
      • -
    • -
    diff --git a/oldtests/Lists/MultipleBlankLines.markdown b/oldtests/Lists/MultipleBlankLines.markdown deleted file mode 100644 index e24a4f2..0000000 --- a/oldtests/Lists/MultipleBlankLines.markdown +++ /dev/null @@ -1,37 +0,0 @@ -1. First Item - - * one - * two - - * one - * two - -1. Second Item - - * one - * two - - - * one - * two - -1. Third Item - - * one - * two - - - - * one - * two - -1. Fourth Item - - * one - * two - - - - - * one - * two diff --git a/oldtests/Lists/Start.html b/oldtests/Lists/Start.html deleted file mode 100644 index 46e0550..0000000 --- a/oldtests/Lists/Start.html +++ /dev/null @@ -1,11 +0,0 @@ -
      -
    1. this list starts with 4.
    2. -
    3. and continues
    4. -
    5. the continuation number is irrelevant.
    6. -
    -
      -
    1. a space odyssey
    2. -
    -
      -
    1. standard lists get no start attribute
    2. -
    diff --git a/oldtests/Lists/Start.markdown b/oldtests/Lists/Start.markdown deleted file mode 100644 index 175226f..0000000 --- a/oldtests/Lists/Start.markdown +++ /dev/null @@ -1,7 +0,0 @@ -4. this list starts with 4. -5. and continues -1. the continuation number is irrelevant. - -2001) a space odyssey - -1. standard lists get no start attribute diff --git a/oldtests/Lists/Sublists.html b/oldtests/Lists/Sublists.html deleted file mode 100644 index af62915..0000000 --- a/oldtests/Lists/Sublists.html +++ /dev/null @@ -1,49 +0,0 @@ -

    Four levels:

    -
      -
    • one -
        -
      • two -
          -
        • three -
            -
          • four
          • -
        • -
      • -
    • -
    -
      -
    1. one -
        -
      1. two -
          -
        1. three -
            -
          1. four
          2. -
        2. -
      2. -
    2. -
    -
      -
    1. one -
        -
      • two -
          -
        1. three -
            -
          • four
          • -
        2. -
      • -
    2. -
    -
      -
    • one -
        -
      • two -
          -
        1. three -
            -
          1. four
          2. -
        2. -
      • -
    • -
    diff --git a/oldtests/Lists/Sublists.markdown b/oldtests/Lists/Sublists.markdown deleted file mode 100644 index 9eced9e..0000000 --- a/oldtests/Lists/Sublists.markdown +++ /dev/null @@ -1,24 +0,0 @@ -Four levels: - -- one - - two - - three - - four - - -1. one - 1. two - 1. three - 1. four - - -1) one - - two - 1) three - - four - - -- one - - two - 1. three - 1) four diff --git a/oldtests/Lists/TightAndLoose.html b/oldtests/Lists/TightAndLoose.html deleted file mode 100644 index 7792ebb..0000000 --- a/oldtests/Lists/TightAndLoose.html +++ /dev/null @@ -1,49 +0,0 @@ -
      -
    1. tight
    2. -
    -
      -
    • tight
    • -
    • list
    • -
    -
      -
    • loose

    • -
    • list

    • -
    -
      -
    1. tight
    2. -
    3. list
    4. -
    -
      -
    1. loose

    2. -
    3. list

    4. -
    -
      -
    1. loose

      -
        -
      • sublist
      • -
    2. -
    -
      -
    1. tight -
        -
      • sublist
      • -
    2. -
    -
      -
    • tight -
      -

      blockquote -and

      -
      -
      code
      -
    • -
    • tight
    • -
    -
      -
    • tight -
        -
      • with loose

      • -
      • sublist

      • -
    • -
    • tight
    • -
    diff --git a/oldtests/Lists/TightAndLoose.markdown b/oldtests/Lists/TightAndLoose.markdown deleted file mode 100644 index 263a34c..0000000 --- a/oldtests/Lists/TightAndLoose.markdown +++ /dev/null @@ -1,45 +0,0 @@ -1. tight - - -- tight -- list - - -- loose - -- list - - -1. tight -2. list - - -1. loose - -2. list - - -1. loose - - - sublist - - - -1. tight - - sublist - - -- tight - > blockquote - and - ``` - code - ``` -- tight - - -- tight - - with loose - - - sublist -- tight diff --git a/oldtests/Lists/TightLooseBlockquote.html b/oldtests/Lists/TightLooseBlockquote.html deleted file mode 100644 index 7e78214..0000000 --- a/oldtests/Lists/TightLooseBlockquote.html +++ /dev/null @@ -1,32 +0,0 @@ -
      -
    • tight I -
      -

      bq

      -
    • -
    • tight I
    • -
    -
      -
    • tight II -
      -

      bq

      -
      -foo
    • -
    • tight II
    • -
    -
      -
    1. Blank lines in bq don't break list -
      -

      bq

      -
    2. -
    3. Should say (2) in output
    4. -
    -
      -
    • Blank lines in bq don't break LI -
        -
      • item A -
        -

        bq

        -
      • -
      • item B
      • -
    • -
    diff --git a/oldtests/Lists/TightLooseBlockquote.markdown b/oldtests/Lists/TightLooseBlockquote.markdown deleted file mode 100644 index 08200cc..0000000 --- a/oldtests/Lists/TightLooseBlockquote.markdown +++ /dev/null @@ -1,25 +0,0 @@ -* tight I - > bq - > -* tight I - - -* tight II - > bq - > - foo -* tight II - -1. Blank lines in bq don't break list - > bq - > - > -1. Should say (2) in output - -* Blank lines in bq don't break LI - * item A - > bq - > - > - * item B - \ No newline at end of file diff --git a/oldtests/Lists/TightLooseMore.html b/oldtests/Lists/TightLooseMore.html deleted file mode 100644 index f26f457..0000000 --- a/oldtests/Lists/TightLooseMore.html +++ /dev/null @@ -1,7 +0,0 @@ -
      -
    • foo

      -
        -
      • bar
      • -
      -

      blah

    • -
    diff --git a/oldtests/Lists/TightLooseMore.markdown b/oldtests/Lists/TightLooseMore.markdown deleted file mode 100644 index 7ace63f..0000000 --- a/oldtests/Lists/TightLooseMore.markdown +++ /dev/null @@ -1,4 +0,0 @@ -* foo - * bar - - blah \ No newline at end of file diff --git a/oldtests/Lists/TwoBlankLinesEndList.html b/oldtests/Lists/TwoBlankLinesEndList.html deleted file mode 100644 index 629add1..0000000 --- a/oldtests/Lists/TwoBlankLinesEndList.html +++ /dev/null @@ -1,21 +0,0 @@ -
      -
    1. one

    2. -
    3. two

    4. -
    -
      -
    1. new list
    2. -
    -
    -
      -
    • one

    • -
    • two

    • -
    -
      -
    • new list
    • -
    -
    -
      -
    1. one
    2. -
    -
    code
    -
    diff --git a/oldtests/Lists/TwoBlankLinesEndList.markdown b/oldtests/Lists/TwoBlankLinesEndList.markdown deleted file mode 100644 index 2984a19..0000000 --- a/oldtests/Lists/TwoBlankLinesEndList.markdown +++ /dev/null @@ -1,20 +0,0 @@ -1. one - -2. two - - -1. new list - - -> - one -> -> - two -> -> -> - new list - - -1. one - - - code diff --git a/oldtests/Makefile b/oldtests/Makefile deleted file mode 100644 index c8a30bd..0000000 --- a/oldtests/Makefile +++ /dev/null @@ -1,55 +0,0 @@ -SHELL=/bin/bash -TESTDIR ?= * -PATT ?= . -TESTS=$(shell ls $(TESTDIR)/*.markdown | grep $(PATT)) -DIFFS=$(patsubst %.markdown,%.diff,$(TESTS)) -PROG ?= ../stmd -FILTER ?= perl -pe 's/ /␣/g' -TIDYCMD ?= tidy -asxhtml -utf8 --show-body-only yes --show-warnings no -quiet -DETAILS ?= 1 - -# Check to see if echo supports -e option to allow backslash escapes -ifeq ($(shell echo -e),-e) -ECHO=echo -else -ECHO=echo -e -endif - -all: $(DIFFS) - PASS=0;TESTS=0; \ - for f in $(DIFFS); do \ - let TESTS=TESTS+1; \ - [ -s $$f ] || let PASS=PASS+1; \ - done; \ - $(ECHO) "\033[1m$$PASS of $$TESTS tests passed.\033[0m"; \ - if [ $$TESTS -eq $$PASS ]; then exit 0; else exit 1; fi - -%.actual.html: %.markdown -ifeq ($(TIDY),1) - -cat $< | $(PROG) | $(TIDYCMD) > $@ -else - -cat $< | $(PROG) > $@ -endif - -%.expected.html: %.html -ifeq ($(TIDY),1) - -$(TIDYCMD) $< > $@ -else - cp $< $@ -endif - -%.diff: %.expected.html %.actual.html - diff --unified=1 <(cat $(word 1,$^) | $(FILTER)) <(cat $(word 2,$^) | $(FILTER)) > $@ ; \ - if [ -s $@ ]; then \ - $(ECHO) "\033[1;31m✘ $(patsubst %.diff,%,$@)\033[0m"; \ - if [ $(DETAILS) == "1" ]; then \ - $(ECHO) "\033[0;36m" ; cat $@; $(ECHO) "\033[0m"; \ - fi \ - else \ - $(ECHO) "\033[1;32m✓ $(patsubst %.diff,%,$@)\033[0m"; \ - fi - -.PHONY: all clean - -clean: - -@rm */*.{diff,actual.html,expected.html} diff --git a/oldtests/Misc/BackslashEscapes.html b/oldtests/Misc/BackslashEscapes.html deleted file mode 100644 index 3eb2aed..0000000 --- a/oldtests/Misc/BackslashEscapes.html +++ /dev/null @@ -1,14 +0,0 @@ -

    *not emphasized* -\emphasis -**not bold** -<br/> not a tag -[link](/foo) not a link -link -`not code`

    -

    1. not a list item

    -

    * not a list.

    -

    # Not a header

    -

    [foo]: /url "not a reference"

    -

    $ ^ ; can be escaped. -\a \b \T cannot. -unicode letters and symbols cannot: \π \‥.

    diff --git a/oldtests/Misc/BackslashEscapes.markdown b/oldtests/Misc/BackslashEscapes.markdown deleted file mode 100644 index 23496dc..0000000 --- a/oldtests/Misc/BackslashEscapes.markdown +++ /dev/null @@ -1,19 +0,0 @@ -\*not emphasized* -\\*emphasis* -\*\*not bold** -\
    not a tag -\[link](/foo) not a link -[link](/foo\) "title\"") -\`not code` - -1\. not a list item - -\* not a list. - -\# Not a header - -\[foo]: /url "not a reference" - -\$ \^ \; can be escaped. -\a \b \T cannot. -unicode letters and symbols cannot: \π \‥. diff --git a/oldtests/Misc/Laziness.html b/oldtests/Misc/Laziness.html deleted file mode 100644 index e130eb5..0000000 --- a/oldtests/Misc/Laziness.html +++ /dev/null @@ -1,22 +0,0 @@ -
    -
      -
    1. one -two
    2. -
    -
    -

    Laziness only affects paragraph continuations:

    -
    -
    code
    -
    -
    -
    not same code block
    -
    -
      -
    1. hello
    2. -
    -
    -
    -
    -
    -

    code

    -
    diff --git a/oldtests/Misc/Laziness.markdown b/oldtests/Misc/Laziness.markdown deleted file mode 100644 index 2c32870..0000000 --- a/oldtests/Misc/Laziness.markdown +++ /dev/null @@ -1,14 +0,0 @@ -> 1. one -> two - -Laziness only affects paragraph continuations: - -> code - not same code block - -1. hello ------ - -> ``` -code -``` diff --git a/oldtests/Misc/LineBreaks.html b/oldtests/Misc/LineBreaks.html deleted file mode 100644 index 2d85e85..0000000 --- a/oldtests/Misc/LineBreaks.html +++ /dev/null @@ -1,11 +0,0 @@ -

    Two spaces
    -break a line. Or more than two
    -and spaces in the following line are absorbed.

    -

    You can also break lines with
    -a backslash.

    -

    Two spaces at the end of a paragraph are -not a line break.

    -

    A backslash at the end of a paragraph is -not a line break.\

    -

    Similarly with setext headers

    -

    And with backslashes\

    diff --git a/oldtests/Misc/LineBreaks.markdown b/oldtests/Misc/LineBreaks.markdown deleted file mode 100644 index 3632dcb..0000000 --- a/oldtests/Misc/LineBreaks.markdown +++ /dev/null @@ -1,18 +0,0 @@ -Two spaces -break a line. Or more than two - and spaces in the following line are absorbed. - -You can also break lines with\ -a backslash. - -Two spaces at the end of a paragraph are -not a line break. - -A backslash at the end of a paragraph is -not a line break.\ - -Similarly with setext headers -------------------------------- - -And with backslashes\ ---------------------- diff --git a/oldtests/Misc/Transitions.html b/oldtests/Misc/Transitions.html deleted file mode 100644 index fceff9f..0000000 --- a/oldtests/Misc/Transitions.html +++ /dev/null @@ -1,26 +0,0 @@ -
    -

    blockquote

    -
    -

    blockquote

    -
    -
    -
      -
    1. list
    2. -
    3. list -
        -
      • sublist
      • -
    4. -
    -
    -

    paragraph

    -

    header

    -

    header

    -
    code
    -
    -
    code
    -
    -
    -
    -# not a header -
    -
    diff --git a/oldtests/Misc/Transitions.markdown b/oldtests/Misc/Transitions.markdown deleted file mode 100644 index 5f3a9d3..0000000 --- a/oldtests/Misc/Transitions.markdown +++ /dev/null @@ -1,20 +0,0 @@ -> blockquote -> > blockquote -1. list -2. list - - sublist -* * * * * -paragraph - -header ------- -### header - code -``` -code -``` -
    -
    -# not a header -
    -
    diff --git a/oldtests/Original/Amps_and_angle_encoding.html b/oldtests/Original/Amps_and_angle_encoding.html deleted file mode 100644 index fc1b2c3..0000000 --- a/oldtests/Original/Amps_and_angle_encoding.html +++ /dev/null @@ -1,9 +0,0 @@ -

    AT&T has an ampersand in their name.

    -

    AT&T is another way to write it.

    -

    This & that.

    -

    4 < 5.

    -

    6 > 5.

    -

    Here's a link with an ampersand in the URL.

    -

    Here's a link with an amersand in the link text: AT&T.

    -

    Here's an inline link.

    -

    Here's an inline link.

    diff --git a/oldtests/Original/Amps_and_angle_encoding.markdown b/oldtests/Original/Amps_and_angle_encoding.markdown deleted file mode 100644 index 0e9527f..0000000 --- a/oldtests/Original/Amps_and_angle_encoding.markdown +++ /dev/null @@ -1,21 +0,0 @@ -AT&T has an ampersand in their name. - -AT&T is another way to write it. - -This & that. - -4 < 5. - -6 > 5. - -Here's a [link] [1] with an ampersand in the URL. - -Here's a link with an amersand in the link text: [AT&T] [2]. - -Here's an inline [link](/script?foo=1&bar=2). - -Here's an inline [link](). - - -[1]: http://example.com/?foo=1&bar=2 -[2]: http://att.com/ "AT&T" \ No newline at end of file diff --git a/oldtests/Original/Auto_links.html b/oldtests/Original/Auto_links.html deleted file mode 100644 index f517fe6..0000000 --- a/oldtests/Original/Auto_links.html +++ /dev/null @@ -1,13 +0,0 @@ -

    Link: http://example.com/.

    -

    With an ampersand: http://example.com/?foo=1&bar=2

    - -
    -

    Blockquoted: http://example.com/

    -
    -

    Auto-links should not occur here: <http://example.com/>

    -
    or here: <http://example.com/>
    -
    diff --git a/oldtests/Original/Auto_links.markdown b/oldtests/Original/Auto_links.markdown deleted file mode 100644 index abbc488..0000000 --- a/oldtests/Original/Auto_links.markdown +++ /dev/null @@ -1,13 +0,0 @@ -Link: . - -With an ampersand: - -* In a list? -* -* It should. - -> Blockquoted: - -Auto-links should not occur here: `` - - or here: \ No newline at end of file diff --git a/oldtests/Original/Backslash_escapes.html b/oldtests/Original/Backslash_escapes.html deleted file mode 100644 index 9a83379..0000000 --- a/oldtests/Original/Backslash_escapes.html +++ /dev/null @@ -1,75 +0,0 @@ -

    These should all get escaped:

    -

    Backslash: \

    -

    Backtick: `

    -

    Asterisk: *

    -

    Underscore: _

    -

    Left brace: {

    -

    Right brace: }

    -

    Left bracket: [

    -

    Right bracket: ]

    -

    Left paren: (

    -

    Right paren: )

    -

    Greater-than: >

    -

    Hash: #

    -

    Period: .

    -

    Bang: !

    -

    Plus: +

    -

    Minus: -

    -

    These should not, because they occur within a code block:

    -
    Backslash: \\
    -
    -Backtick: \`
    -
    -Asterisk: \*
    -
    -Underscore: \_
    -
    -Left brace: \{
    -
    -Right brace: \}
    -
    -Left bracket: \[
    -
    -Right bracket: \]
    -
    -Left paren: \(
    -
    -Right paren: \)
    -
    -Greater-than: \>
    -
    -Hash: \#
    -
    -Period: \.
    -
    -Bang: \!
    -
    -Plus: \+
    -
    -Minus: \-
    -
    -

    Nor should these, which occur in code spans:

    -

    Backslash: \\

    -

    Backtick: \`

    -

    Asterisk: \*

    -

    Underscore: \_

    -

    Left brace: \{

    -

    Right brace: \}

    -

    Left bracket: \[

    -

    Right bracket: \]

    -

    Left paren: \(

    -

    Right paren: \)

    -

    Greater-than: \>

    -

    Hash: \#

    -

    Period: \.

    -

    Bang: \!

    -

    Plus: \+

    -

    Minus: \-

    -

    These should get escaped, even though they're matching pairs for -other Markdown constructs:

    -

    *asterisks*

    -

    _underscores_

    -

    `backticks`

    -

    This is a code span with a literal backslash-backtick sequence: \`

    -

    This is a tag with unescaped backticks bar.

    -

    This is a tag with backslashes bar.

    diff --git a/oldtests/Original/Backslash_escapes.markdown b/oldtests/Original/Backslash_escapes.markdown deleted file mode 100644 index 5b014cb..0000000 --- a/oldtests/Original/Backslash_escapes.markdown +++ /dev/null @@ -1,120 +0,0 @@ -These should all get escaped: - -Backslash: \\ - -Backtick: \` - -Asterisk: \* - -Underscore: \_ - -Left brace: \{ - -Right brace: \} - -Left bracket: \[ - -Right bracket: \] - -Left paren: \( - -Right paren: \) - -Greater-than: \> - -Hash: \# - -Period: \. - -Bang: \! - -Plus: \+ - -Minus: \- - - - -These should not, because they occur within a code block: - - Backslash: \\ - - Backtick: \` - - Asterisk: \* - - Underscore: \_ - - Left brace: \{ - - Right brace: \} - - Left bracket: \[ - - Right bracket: \] - - Left paren: \( - - Right paren: \) - - Greater-than: \> - - Hash: \# - - Period: \. - - Bang: \! - - Plus: \+ - - Minus: \- - - -Nor should these, which occur in code spans: - -Backslash: `\\` - -Backtick: `` \` `` - -Asterisk: `\*` - -Underscore: `\_` - -Left brace: `\{` - -Right brace: `\}` - -Left bracket: `\[` - -Right bracket: `\]` - -Left paren: `\(` - -Right paren: `\)` - -Greater-than: `\>` - -Hash: `\#` - -Period: `\.` - -Bang: `\!` - -Plus: `\+` - -Minus: `\-` - - -These should get escaped, even though they're matching pairs for -other Markdown constructs: - -\*asterisks\* - -\_underscores\_ - -\`backticks\` - -This is a code span with a literal backslash-backtick sequence: `` \` `` - -This is a tag with unescaped backticks bar. - -This is a tag with backslashes bar. diff --git a/oldtests/Original/Blockquotes_with_code_blocks.html b/oldtests/Original/Blockquotes_with_code_blocks.html deleted file mode 100644 index fd1cb1b..0000000 --- a/oldtests/Original/Blockquotes_with_code_blocks.html +++ /dev/null @@ -1,12 +0,0 @@ -
    -

    Example:

    -
    sub status {
    -    print "working";
    -}
    -
    -

    Or:

    -
    sub status {
    -    return "working";
    -}
    -
    -
    diff --git a/oldtests/Original/Blockquotes_with_code_blocks.markdown b/oldtests/Original/Blockquotes_with_code_blocks.markdown deleted file mode 100644 index c31d171..0000000 --- a/oldtests/Original/Blockquotes_with_code_blocks.markdown +++ /dev/null @@ -1,11 +0,0 @@ -> Example: -> -> sub status { -> print "working"; -> } -> -> Or: -> -> sub status { -> return "working"; -> } diff --git a/oldtests/Original/Code_Blocks.html b/oldtests/Original/Code_Blocks.html deleted file mode 100644 index 7d89615..0000000 --- a/oldtests/Original/Code_Blocks.html +++ /dev/null @@ -1,12 +0,0 @@ -
    code block on the first line
    -
    -

    Regular text.

    -
    code block indented by spaces
    -
    -

    Regular text.

    -
    the lines in this block  
    -all contain trailing spaces  
    -
    -

    Regular Text.

    -
    code block on the last line
    -
    diff --git a/oldtests/Original/Code_Blocks.markdown b/oldtests/Original/Code_Blocks.markdown deleted file mode 100644 index b54b092..0000000 --- a/oldtests/Original/Code_Blocks.markdown +++ /dev/null @@ -1,14 +0,0 @@ - code block on the first line - -Regular text. - - code block indented by spaces - -Regular text. - - the lines in this block - all contain trailing spaces - -Regular Text. - - code block on the last line \ No newline at end of file diff --git a/oldtests/Original/Code_Spans.html b/oldtests/Original/Code_Spans.html deleted file mode 100644 index 27acea1..0000000 --- a/oldtests/Original/Code_Spans.html +++ /dev/null @@ -1,3 +0,0 @@ -

    <test a=" content of attribute ">

    -

    Fix for backticks within HTML tag: like this

    -

    Here's how you put `backticks` in a code span.

    diff --git a/oldtests/Original/Code_Spans.markdown b/oldtests/Original/Code_Spans.markdown deleted file mode 100644 index 5c229c7..0000000 --- a/oldtests/Original/Code_Spans.markdown +++ /dev/null @@ -1,5 +0,0 @@ -`` - -Fix for backticks within HTML tag: like this - -Here's how you put `` `backticks` `` in a code span. \ No newline at end of file diff --git a/oldtests/Original/Horizontal_rules.html b/oldtests/Original/Horizontal_rules.html deleted file mode 100644 index a89efdb..0000000 --- a/oldtests/Original/Horizontal_rules.html +++ /dev/null @@ -1,39 +0,0 @@ -

    Dashes:

    -
    -
    -
    -
    -
    ---
    -
    -
    -
    -
    -
    -
    - - -
    -
    -

    Asterisks:

    -
    -
    -
    -
    -
    ***
    -
    -
    -
    -
    -
    -
    * * *
    -
    -

    Underscores:

    -
    -
    -
    -
    -
    ___
    -
    -
    -
    -
    -
    -
    _ _ _
    -
    diff --git a/oldtests/Original/Horizontal_rules.markdown b/oldtests/Original/Horizontal_rules.markdown deleted file mode 100644 index 1594bda..0000000 --- a/oldtests/Original/Horizontal_rules.markdown +++ /dev/null @@ -1,67 +0,0 @@ -Dashes: - ---- - - --- - - --- - - --- - - --- - -- - - - - - - - - - - - - - - - - - - - - - - - - -Asterisks: - -*** - - *** - - *** - - *** - - *** - -* * * - - * * * - - * * * - - * * * - - * * * - - -Underscores: - -___ - - ___ - - ___ - - ___ - - ___ - -_ _ _ - - _ _ _ - - _ _ _ - - _ _ _ - - _ _ _ diff --git a/oldtests/Original/Images.html b/oldtests/Original/Images.html deleted file mode 100644 index bd5a7e0..0000000 --- a/oldtests/Original/Images.html +++ /dev/null @@ -1,11 +0,0 @@ -

    Alt text

    -

    Alt text

    -

    Inline within a paragraph: alt text.

    -

    alt text

    -

    alt text

    -

    alt text

    -

    alt text.

    -

    Empty

    -

    this is a stupid URL

    -

    alt text

    -

    alt text

    diff --git a/oldtests/Original/Images.markdown b/oldtests/Original/Images.markdown deleted file mode 100644 index 5707590..0000000 --- a/oldtests/Original/Images.markdown +++ /dev/null @@ -1,26 +0,0 @@ -![Alt text](/path/to/img.jpg) - -![Alt text](/path/to/img.jpg "Optional title") - -Inline within a paragraph: [alt text](/url/). - -![alt text](/url/ "title preceded by two spaces") - -![alt text](/url/ "title has spaces afterward" ) - -![alt text]() - -![alt text]( "with a title"). - -![Empty]() - -![this is a stupid URL](http://example.com/(parens).jpg) - - -![alt text][foo] - - [foo]: /url/ - -![alt text][bar] - - [bar]: /url/ "Title here" \ No newline at end of file diff --git a/oldtests/Original/Inline_HTML_Advanced.html b/oldtests/Original/Inline_HTML_Advanced.html deleted file mode 100644 index 631c135..0000000 --- a/oldtests/Original/Inline_HTML_Advanced.html +++ /dev/null @@ -1,23 +0,0 @@ -

    Simple block on one line:

    -
    foo
    -

    And nested without indentation:

    -
    -
    -
    -foo -
    -
    -
    -
    bar
    -
    -

    And with attributes:

    -
    -
    -
    -
    -

    This was broken in 1.0.2b7:

    -
    -
    -foo -
    -
    diff --git a/oldtests/Original/Inline_HTML_Advanced.markdown b/oldtests/Original/Inline_HTML_Advanced.markdown deleted file mode 100644 index 3633f81..0000000 --- a/oldtests/Original/Inline_HTML_Advanced.markdown +++ /dev/null @@ -1,30 +0,0 @@ -Simple block on one line: - -
    foo
    - -And nested without indentation: - -
    -
    -
    -foo -
    -
    -
    -
    bar
    -
    - -And with attributes: - -
    -
    -
    -
    - -This was broken in 1.0.2b7: - -
    -
    -foo -
    -
    diff --git a/oldtests/Original/Inline_HTML_Simple.html b/oldtests/Original/Inline_HTML_Simple.html deleted file mode 100644 index 923a18c..0000000 --- a/oldtests/Original/Inline_HTML_Simple.html +++ /dev/null @@ -1,45 +0,0 @@ -

    Here's a simple block:

    -
    - foo -
    -

    This should be a code block, though:

    -
    <div>
    -    foo
    -</div>
    -
    -

    As should this:

    -
    <div>foo</div>
    -
    -

    Now, nested:

    -
    -
    -
    - foo -
    -
    -
    -

    This should just be an HTML comment:

    - -

    Multiline:

    - -

    Code block:

    -
    <!-- Comment -->
    -
    -

    Just plain comment, with trailing spaces on the line:

    - -

    Code:

    -
    <hr />
    -
    -

    Hr's:

    -
    -
    -
    -
    -
    -
    -
    -
    -
    diff --git a/oldtests/Original/Inline_HTML_Simple.markdown b/oldtests/Original/Inline_HTML_Simple.markdown deleted file mode 100644 index 14aa2dc..0000000 --- a/oldtests/Original/Inline_HTML_Simple.markdown +++ /dev/null @@ -1,69 +0,0 @@ -Here's a simple block: - -
    - foo -
    - -This should be a code block, though: - -
    - foo -
    - -As should this: - -
    foo
    - -Now, nested: - -
    -
    -
    - foo -
    -
    -
    - -This should just be an HTML comment: - - - -Multiline: - - - -Code block: - - - -Just plain comment, with trailing spaces on the line: - - - -Code: - -
    - -Hr's: - -
    - -
    - -
    - -
    - -
    - -
    - -
    - -
    - -
    - diff --git a/oldtests/Original/Inline_HTML_comments.html b/oldtests/Original/Inline_HTML_comments.html deleted file mode 100644 index ebc4818..0000000 --- a/oldtests/Original/Inline_HTML_comments.html +++ /dev/null @@ -1,8 +0,0 @@ -

    Paragraph one.

    - - -

    Paragraph two.

    - -

    The end.

    diff --git a/oldtests/Original/Inline_HTML_comments.markdown b/oldtests/Original/Inline_HTML_comments.markdown deleted file mode 100644 index 41d830d..0000000 --- a/oldtests/Original/Inline_HTML_comments.markdown +++ /dev/null @@ -1,13 +0,0 @@ -Paragraph one. - - - - - -Paragraph two. - - - -The end. diff --git a/oldtests/Original/Links_inline_style.html b/oldtests/Original/Links_inline_style.html deleted file mode 100644 index feb4637..0000000 --- a/oldtests/Original/Links_inline_style.html +++ /dev/null @@ -1,12 +0,0 @@ -

    Just a URL.

    -

    URL and title.

    -

    URL and title.

    -

    URL and title.

    -

    URL and title.

    -

    URL wrapped in angle brackets.

    -

    URL w/ angle brackets + title.

    -

    Empty.

    -

    With parens in the URL

    -

    (With outer parens and parens in url)

    -

    With parens in the URL

    -

    (With outer parens and parens in url)

    diff --git a/oldtests/Original/Links_inline_style.markdown b/oldtests/Original/Links_inline_style.markdown deleted file mode 100644 index aba9658..0000000 --- a/oldtests/Original/Links_inline_style.markdown +++ /dev/null @@ -1,24 +0,0 @@ -Just a [URL](/url/). - -[URL and title](/url/ "title"). - -[URL and title](/url/ "title preceded by two spaces"). - -[URL and title](/url/ "title preceded by a tab"). - -[URL and title](/url/ "title has spaces afterward" ). - -[URL wrapped in angle brackets](). - -[URL w/ angle brackets + title]( "Here's the title"). - -[Empty](). - -[With parens in the URL](http://en.wikipedia.org/wiki/WIMP_(computing)) - -(With outer parens and [parens in url](/foo(bar))) - - -[With parens in the URL](/foo(bar) "and a title") - -(With outer parens and [parens in url](/foo(bar) "and a title")) diff --git a/oldtests/Original/Links_reference_style.html b/oldtests/Original/Links_reference_style.html deleted file mode 100644 index 6d78b96..0000000 --- a/oldtests/Original/Links_reference_style.html +++ /dev/null @@ -1,28 +0,0 @@ -

    Foo bar.

    -

    Foo bar.

    -

    Foo bar.

    -

    With embedded [brackets].

    -

    Indented once.

    -

    Indented twice.

    -

    Indented thrice.

    -

    Indented [four][] times.

    -
    [four]: /url
    -
    -
    -

    this should work

    -

    So should this.

    -

    And this.

    -

    And this.

    -

    And this.

    -

    But not [that] [].

    -

    Nor [that][].

    -

    Nor [that].

    -

    [Something in brackets like this should work]

    -

    [Same with this.]

    -

    In this case, this points to something else.

    -

    Backslashing should suppress [this] and [this].

    -
    -

    Here's one where the link -breaks across lines.

    -

    Here's another where the link -breaks across lines, but with a line-ending space.

    diff --git a/oldtests/Original/Links_reference_style.markdown b/oldtests/Original/Links_reference_style.markdown deleted file mode 100644 index 341ec88..0000000 --- a/oldtests/Original/Links_reference_style.markdown +++ /dev/null @@ -1,71 +0,0 @@ -Foo [bar] [1]. - -Foo [bar][1]. - -Foo [bar] -[1]. - -[1]: /url/ "Title" - - -With [embedded [brackets]] [b]. - - -Indented [once][]. - -Indented [twice][]. - -Indented [thrice][]. - -Indented [four][] times. - - [once]: /url - - [twice]: /url - - [thrice]: /url - - [four]: /url - - -[b]: /url/ - -* * * - -[this] [this] should work - -So should [this][this]. - -And [this] []. - -And [this][]. - -And [this]. - -But not [that] []. - -Nor [that][]. - -Nor [that]. - -[Something in brackets like [this][] should work] - -[Same with [this].] - -In this case, [this](/somethingelse/) points to something else. - -Backslashing should suppress \[this] and [this\]. - -[this]: foo - - -* * * - -Here's one where the [link -breaks] across lines. - -Here's another where the [link -breaks] across lines, but with a line-ending space. - - -[link breaks]: /url/ diff --git a/oldtests/Original/Links_shortcut_references.html b/oldtests/Original/Links_shortcut_references.html deleted file mode 100644 index 8163ade..0000000 --- a/oldtests/Original/Links_shortcut_references.html +++ /dev/null @@ -1,6 +0,0 @@ -

    This is the simple case.

    -

    This one has a line -break.

    -

    This one has a line -break with a line-ending space.

    -

    this and the other

    diff --git a/oldtests/Original/Links_shortcut_references.markdown b/oldtests/Original/Links_shortcut_references.markdown deleted file mode 100644 index 8c44c98..0000000 --- a/oldtests/Original/Links_shortcut_references.markdown +++ /dev/null @@ -1,20 +0,0 @@ -This is the [simple case]. - -[simple case]: /simple - - - -This one has a [line -break]. - -This one has a [line -break] with a line-ending space. - -[line break]: /foo - - -[this] [that] and the [other] - -[this]: /this -[that]: /that -[other]: /other diff --git a/oldtests/Original/Literal_quotes_in_titles.html b/oldtests/Original/Literal_quotes_in_titles.html deleted file mode 100644 index 62e8641..0000000 --- a/oldtests/Original/Literal_quotes_in_titles.html +++ /dev/null @@ -1,2 +0,0 @@ -

    Foo bar.

    -

    Foo bar.

    diff --git a/oldtests/Original/Literal_quotes_in_titles.markdown b/oldtests/Original/Literal_quotes_in_titles.markdown deleted file mode 100644 index 29d0e42..0000000 --- a/oldtests/Original/Literal_quotes_in_titles.markdown +++ /dev/null @@ -1,7 +0,0 @@ -Foo [bar][]. - -Foo [bar](/url/ "Title with "quotes" inside"). - - - [bar]: /url/ "Title with "quotes" inside" - diff --git a/oldtests/Original/Markdown_Documentation_Basics.html b/oldtests/Original/Markdown_Documentation_Basics.html deleted file mode 100644 index 0dee67f..0000000 --- a/oldtests/Original/Markdown_Documentation_Basics.html +++ /dev/null @@ -1,242 +0,0 @@ -

    Markdown: Basics

    - -

    Getting the Gist of Markdown's Formatting Syntax

    -

    This page offers a brief overview of what it's like to use Markdown. -The syntax page provides complete, detailed documentation for -every feature, but Markdown should be very easy to pick up simply by -looking at a few examples of it in action. The examples on this page -are written in a before/after style, showing example syntax and the -HTML output produced by Markdown.

    -

    It's also helpful to simply try Markdown out; the Dingus is a -web application that allows you type your own Markdown-formatted text -and translate it to XHTML.

    -

    Note: This document is itself written using Markdown; you -can see the source for it by adding '.text' to the URL.

    -

    Paragraphs, Headers, Blockquotes

    -

    A paragraph is simply one or more consecutive lines of text, separated -by one or more blank lines. (A blank line is any line that looks like a -blank line -- a line containing nothing spaces or tabs is considered -blank.) Normal paragraphs should not be intended with spaces or tabs.

    -

    Markdown offers two styles of headers: Setext and atx. -Setext-style headers for <h1> and <h2> are created by -"underlining" with equal signs (=) and hyphens (-), respectively. -To create an atx-style header, you put 1-6 hash marks (#) at the -beginning of the line -- the number of hashes equals the resulting -HTML header level.

    -

    Blockquotes are indicated using email-style '>' angle brackets.

    -

    Markdown:

    -
    A First Level Header
    -====================
    -
    -A Second Level Header
    ----------------------
    -
    -Now is the time for all good men to come to
    -the aid of their country. This is just a
    -regular paragraph.
    -
    -The quick brown fox jumped over the lazy
    -dog's back.
    -
    -### Header 3
    -
    -> This is a blockquote.
    ->
    -> This is the second paragraph in the blockquote.
    ->
    -> ## This is an H2 in a blockquote
    -
    -

    Output:

    -
    <h1>A First Level Header</h1>
    -
    -<h2>A Second Level Header</h2>
    -
    -<p>Now is the time for all good men to come to
    -the aid of their country. This is just a
    -regular paragraph.</p>
    -
    -<p>The quick brown fox jumped over the lazy
    -dog's back.</p>
    -
    -<h3>Header 3</h3>
    -
    -<blockquote>
    -    <p>This is a blockquote.</p>
    -
    -    <p>This is the second paragraph in the blockquote.</p>
    -
    -    <h2>This is an H2 in a blockquote</h2>
    -</blockquote>
    -
    -

    Phrase Emphasis

    -

    Markdown uses asterisks and underscores to indicate spans of emphasis.

    -

    Markdown:

    -
    Some of these words *are emphasized*.
    -Some of these words _are emphasized also_.
    -
    -Use two asterisks for **strong emphasis**.
    -Or, if you prefer, __use two underscores instead__.
    -
    -

    Output:

    -
    <p>Some of these words <em>are emphasized</em>.
    -Some of these words <em>are emphasized also</em>.</p>
    -
    -<p>Use two asterisks for <strong>strong emphasis</strong>.
    -Or, if you prefer, <strong>use two underscores instead</strong>.</p>
    -
    -

    Lists

    -

    Unordered (bulleted) lists use asterisks, pluses, and hyphens (*, -+, and -) as list markers. These three markers are -interchangable; this:

    -
    *   Candy.
    -*   Gum.
    -*   Booze.
    -
    -

    this:

    -
    +   Candy.
    -+   Gum.
    -+   Booze.
    -
    -

    and this:

    -
    -   Candy.
    --   Gum.
    --   Booze.
    -
    -

    all produce the same output:

    -
    <ul>
    -<li>Candy.</li>
    -<li>Gum.</li>
    -<li>Booze.</li>
    -</ul>
    -
    -

    Ordered (numbered) lists use regular numbers, followed by periods, as -list markers:

    -
    1.  Red
    -2.  Green
    -3.  Blue
    -
    -

    Output:

    -
    <ol>
    -<li>Red</li>
    -<li>Green</li>
    -<li>Blue</li>
    -</ol>
    -
    -

    If you put blank lines between items, you'll get <p> tags for the -list item text. You can create multi-paragraph list items by indenting -the paragraphs by 4 spaces or 1 tab:

    -
    *   A list item.
    -
    -    With multiple paragraphs.
    -
    -*   Another item in the list.
    -
    -

    Output:

    -
    <ul>
    -<li><p>A list item.</p>
    -<p>With multiple paragraphs.</p></li>
    -<li><p>Another item in the list.</p></li>
    -</ul>
    -
    -

    Links

    -

    Markdown supports two styles for creating links: inline and -reference. With both styles, you use square brackets to delimit the -text you want to turn into a link.

    -

    Inline-style links use parentheses immediately after the link text. -For example:

    -
    This is an [example link](http://example.com/).
    -
    -

    Output:

    -
    <p>This is an <a href="http://example.com/">
    -example link</a>.</p>
    -
    -

    Optionally, you may include a title attribute in the parentheses:

    -
    This is an [example link](http://example.com/ "With a Title").
    -
    -

    Output:

    -
    <p>This is an <a href="http://example.com/" title="With a Title">
    -example link</a>.</p>
    -
    -

    Reference-style links allow you to refer to your links by names, which -you define elsewhere in your document:

    -
    I get 10 times more traffic from [Google][1] than from
    -[Yahoo][2] or [MSN][3].
    -
    -[1]: http://google.com/        "Google"
    -[2]: http://search.yahoo.com/  "Yahoo Search"
    -[3]: http://search.msn.com/    "MSN Search"
    -
    -

    Output:

    -
    <p>I get 10 times more traffic from <a href="http://google.com/"
    -title="Google">Google</a> than from <a href="http://search.yahoo.com/"
    -title="Yahoo Search">Yahoo</a> or <a href="http://search.msn.com/"
    -title="MSN Search">MSN</a>.</p>
    -
    -

    The title attribute is optional. Link names may contain letters, -numbers and spaces, but are not case sensitive:

    -
    I start my morning with a cup of coffee and
    -[The New York Times][NY Times].
    -
    -[ny times]: http://www.nytimes.com/
    -
    -

    Output:

    -
    <p>I start my morning with a cup of coffee and
    -<a href="http://www.nytimes.com/">The New York Times</a>.</p>
    -
    -

    Images

    -

    Image syntax is very much like link syntax.

    -

    Inline (titles are optional):

    -
    ![alt text](/path/to/img.jpg "Title")
    -
    -

    Reference-style:

    -
    ![alt text][id]
    -
    -[id]: /path/to/img.jpg "Title"
    -
    -

    Both of the above examples produce the same output:

    -
    <img src="/path/to/img.jpg" alt="alt text" title="Title" />
    -
    -

    Code

    -

    In a regular paragraph, you can create code span by wrapping text in -backtick quotes. Any ampersands (&) and angle brackets (< or ->) will automatically be translated into HTML entities. This makes -it easy to use Markdown to write about HTML example code:

    -
    I strongly recommend against using any `<blink>` tags.
    -
    -I wish SmartyPants used named entities like `&mdash;`
    -instead of decimal-encoded entites like `&#8212;`.
    -
    -

    Output:

    -
    <p>I strongly recommend against using any
    -<code>&lt;blink&gt;</code> tags.</p>
    -
    -<p>I wish SmartyPants used named entities like
    -<code>&amp;mdash;</code> instead of decimal-encoded
    -entites like <code>&amp;#8212;</code>.</p>
    -
    -

    To specify an entire block of pre-formatted code, indent every line of -the block by 4 spaces or 1 tab. Just like with code spans, &, <, -and > characters will be escaped automatically.

    -

    Markdown:

    -
    If you want your page to validate under XHTML 1.0 Strict,
    -you've got to put paragraph tags in your blockquotes:
    -
    -    <blockquote>
    -        <p>For example.</p>
    -    </blockquote>
    -
    -

    Output:

    -
    <p>If you want your page to validate under XHTML 1.0 Strict,
    -you've got to put paragraph tags in your blockquotes:</p>
    -
    -<pre><code>&lt;blockquote&gt;
    -    &lt;p&gt;For example.&lt;/p&gt;
    -&lt;/blockquote&gt;
    -</code></pre>
    -
    diff --git a/oldtests/Original/Markdown_Documentation_Basics.markdown b/oldtests/Original/Markdown_Documentation_Basics.markdown deleted file mode 100644 index 24eba65..0000000 --- a/oldtests/Original/Markdown_Documentation_Basics.markdown +++ /dev/null @@ -1,306 +0,0 @@ -Markdown: Basics -================ - - - - -Getting the Gist of Markdown's Formatting Syntax ------------------------------------------------- - -This page offers a brief overview of what it's like to use Markdown. -The [syntax page] [s] provides complete, detailed documentation for -every feature, but Markdown should be very easy to pick up simply by -looking at a few examples of it in action. The examples on this page -are written in a before/after style, showing example syntax and the -HTML output produced by Markdown. - -It's also helpful to simply try Markdown out; the [Dingus] [d] is a -web application that allows you type your own Markdown-formatted text -and translate it to XHTML. - -**Note:** This document is itself written using Markdown; you -can [see the source for it by adding '.text' to the URL] [src]. - - [s]: /projects/markdown/syntax "Markdown Syntax" - [d]: /projects/markdown/dingus "Markdown Dingus" - [src]: /projects/markdown/basics.text - - -## Paragraphs, Headers, Blockquotes ## - -A paragraph is simply one or more consecutive lines of text, separated -by one or more blank lines. (A blank line is any line that looks like a -blank line -- a line containing nothing spaces or tabs is considered -blank.) Normal paragraphs should not be intended with spaces or tabs. - -Markdown offers two styles of headers: *Setext* and *atx*. -Setext-style headers for `

    ` and `

    ` are created by -"underlining" with equal signs (`=`) and hyphens (`-`), respectively. -To create an atx-style header, you put 1-6 hash marks (`#`) at the -beginning of the line -- the number of hashes equals the resulting -HTML header level. - -Blockquotes are indicated using email-style '`>`' angle brackets. - -Markdown: - - A First Level Header - ==================== - - A Second Level Header - --------------------- - - Now is the time for all good men to come to - the aid of their country. This is just a - regular paragraph. - - The quick brown fox jumped over the lazy - dog's back. - - ### Header 3 - - > This is a blockquote. - > - > This is the second paragraph in the blockquote. - > - > ## This is an H2 in a blockquote - - -Output: - -

    A First Level Header

    - -

    A Second Level Header

    - -

    Now is the time for all good men to come to - the aid of their country. This is just a - regular paragraph.

    - -

    The quick brown fox jumped over the lazy - dog's back.

    - -

    Header 3

    - -
    -

    This is a blockquote.

    - -

    This is the second paragraph in the blockquote.

    - -

    This is an H2 in a blockquote

    -
    - - - -### Phrase Emphasis ### - -Markdown uses asterisks and underscores to indicate spans of emphasis. - -Markdown: - - Some of these words *are emphasized*. - Some of these words _are emphasized also_. - - Use two asterisks for **strong emphasis**. - Or, if you prefer, __use two underscores instead__. - -Output: - -

    Some of these words are emphasized. - Some of these words are emphasized also.

    - -

    Use two asterisks for strong emphasis. - Or, if you prefer, use two underscores instead.

    - - - -## Lists ## - -Unordered (bulleted) lists use asterisks, pluses, and hyphens (`*`, -`+`, and `-`) as list markers. These three markers are -interchangable; this: - - * Candy. - * Gum. - * Booze. - -this: - - + Candy. - + Gum. - + Booze. - -and this: - - - Candy. - - Gum. - - Booze. - -all produce the same output: - -
      -
    • Candy.
    • -
    • Gum.
    • -
    • Booze.
    • -
    - -Ordered (numbered) lists use regular numbers, followed by periods, as -list markers: - - 1. Red - 2. Green - 3. Blue - -Output: - -
      -
    1. Red
    2. -
    3. Green
    4. -
    5. Blue
    6. -
    - -If you put blank lines between items, you'll get `

    ` tags for the -list item text. You can create multi-paragraph list items by indenting -the paragraphs by 4 spaces or 1 tab: - - * A list item. - - With multiple paragraphs. - - * Another item in the list. - -Output: - -

      -
    • A list item.

      -

      With multiple paragraphs.

    • -
    • Another item in the list.

    • -
    - - - -### Links ### - -Markdown supports two styles for creating links: *inline* and -*reference*. With both styles, you use square brackets to delimit the -text you want to turn into a link. - -Inline-style links use parentheses immediately after the link text. -For example: - - This is an [example link](http://example.com/). - -Output: - -

    This is an - example link.

    - -Optionally, you may include a title attribute in the parentheses: - - This is an [example link](http://example.com/ "With a Title"). - -Output: - -

    This is an - example link.

    - -Reference-style links allow you to refer to your links by names, which -you define elsewhere in your document: - - I get 10 times more traffic from [Google][1] than from - [Yahoo][2] or [MSN][3]. - - [1]: http://google.com/ "Google" - [2]: http://search.yahoo.com/ "Yahoo Search" - [3]: http://search.msn.com/ "MSN Search" - -Output: - -

    I get 10 times more traffic from Google than from Yahoo or MSN.

    - -The title attribute is optional. Link names may contain letters, -numbers and spaces, but are *not* case sensitive: - - I start my morning with a cup of coffee and - [The New York Times][NY Times]. - - [ny times]: http://www.nytimes.com/ - -Output: - -

    I start my morning with a cup of coffee and - The New York Times.

    - - -### Images ### - -Image syntax is very much like link syntax. - -Inline (titles are optional): - - ![alt text](/path/to/img.jpg "Title") - -Reference-style: - - ![alt text][id] - - [id]: /path/to/img.jpg "Title" - -Both of the above examples produce the same output: - - alt text - - - -### Code ### - -In a regular paragraph, you can create code span by wrapping text in -backtick quotes. Any ampersands (`&`) and angle brackets (`<` or -`>`) will automatically be translated into HTML entities. This makes -it easy to use Markdown to write about HTML example code: - - I strongly recommend against using any `` tags. - - I wish SmartyPants used named entities like `—` - instead of decimal-encoded entites like `—`. - -Output: - -

    I strongly recommend against using any - <blink> tags.

    - -

    I wish SmartyPants used named entities like - &mdash; instead of decimal-encoded - entites like &#8212;.

    - - -To specify an entire block of pre-formatted code, indent every line of -the block by 4 spaces or 1 tab. Just like with code spans, `&`, `<`, -and `>` characters will be escaped automatically. - -Markdown: - - If you want your page to validate under XHTML 1.0 Strict, - you've got to put paragraph tags in your blockquotes: - -
    -

    For example.

    -
    - -Output: - -

    If you want your page to validate under XHTML 1.0 Strict, - you've got to put paragraph tags in your blockquotes:

    - -
    <blockquote>
    -        <p>For example.</p>
    -    </blockquote>
    -    
    diff --git a/oldtests/Original/Markdown_Documentation_Syntax.html b/oldtests/Original/Markdown_Documentation_Syntax.html deleted file mode 100644 index f379dcf..0000000 --- a/oldtests/Original/Markdown_Documentation_Syntax.html +++ /dev/null @@ -1,708 +0,0 @@ -

    Markdown: Syntax

    - - -

    Note: This document is itself written using Markdown; you -can see the source for it by adding '.text' to the URL.

    -
    -

    Overview

    -

    Philosophy

    -

    Markdown is intended to be as easy-to-read and easy-to-write as is feasible.

    -

    Readability, however, is emphasized above all else. A Markdown-formatted -document should be publishable as-is, as plain text, without looking -like it's been marked up with tags or formatting instructions. While -Markdown's syntax has been influenced by several existing text-to-HTML -filters -- including Setext, atx, Textile, reStructuredText, -Grutatext, and EtText -- the single biggest source of -inspiration for Markdown's syntax is the format of plain text email.

    -

    To this end, Markdown's syntax is comprised entirely of punctuation -characters, which punctuation characters have been carefully chosen so -as to look like what they mean. E.g., asterisks around a word actually -look like *emphasis*. Markdown lists look like, well, lists. Even -blockquotes look like quoted passages of text, assuming you've ever -used email.

    -

    Inline HTML

    -

    Markdown's syntax is intended for one purpose: to be used as a -format for writing for the web.

    -

    Markdown is not a replacement for HTML, or even close to it. Its -syntax is very small, corresponding only to a very small subset of -HTML tags. The idea is not to create a syntax that makes it easier -to insert HTML tags. In my opinion, HTML tags are already easy to -insert. The idea for Markdown is to make it easy to read, write, and -edit prose. HTML is a publishing format; Markdown is a writing -format. Thus, Markdown's formatting syntax only addresses issues that -can be conveyed in plain text.

    -

    For any markup that is not covered by Markdown's syntax, you simply -use HTML itself. There's no need to preface it or delimit it to -indicate that you're switching from Markdown to HTML; you just use -the tags.

    -

    The only restrictions are that block-level HTML elements -- e.g. <div>, -<table>, <pre>, <p>, etc. -- must be separated from surrounding -content by blank lines, and the start and end tags of the block should -not be indented with tabs or spaces. Markdown is smart enough not -to add extra (unwanted) <p> tags around HTML block-level tags.

    -

    For example, to add an HTML table to a Markdown article:

    -
    This is a regular paragraph.
    -
    -<table>
    -    <tr>
    -        <td>Foo</td>
    -    </tr>
    -</table>
    -
    -This is another regular paragraph.
    -
    -

    Note that Markdown formatting syntax is not processed within block-level -HTML tags. E.g., you can't use Markdown-style *emphasis* inside an -HTML block.

    -

    Span-level HTML tags -- e.g. <span>, <cite>, or <del> -- can be -used anywhere in a Markdown paragraph, list item, or header. If you -want, you can even use HTML tags instead of Markdown formatting; e.g. if -you'd prefer to use HTML <a> or <img> tags instead of Markdown's -link or image syntax, go right ahead.

    -

    Unlike block-level HTML tags, Markdown syntax is processed within -span-level tags.

    -

    Automatic Escaping for Special Characters

    -

    In HTML, there are two characters that demand special treatment: < -and &. Left angle brackets are used to start tags; ampersands are -used to denote HTML entities. If you want to use them as literal -characters, you must escape them as entities, e.g. &lt;, and -&amp;.

    -

    Ampersands in particular are bedeviling for web writers. If you want to -write about 'AT&T', you need to write 'AT&amp;T'. You even need to -escape ampersands within URLs. Thus, if you want to link to:

    -
    http://images.google.com/images?num=30&q=larry+bird
    -
    -

    you need to encode the URL as:

    -
    http://images.google.com/images?num=30&amp;q=larry+bird
    -
    -

    in your anchor tag href attribute. Needless to say, this is easy to -forget, and is probably the single most common source of HTML validation -errors in otherwise well-marked-up web sites.

    -

    Markdown allows you to use these characters naturally, taking care of -all the necessary escaping for you. If you use an ampersand as part of -an HTML entity, it remains unchanged; otherwise it will be translated -into &amp;.

    -

    So, if you want to include a copyright symbol in your article, you can write:

    -
    &copy;
    -
    -

    and Markdown will leave it alone. But if you write:

    -
    AT&T
    -
    -

    Markdown will translate it to:

    -
    AT&amp;T
    -
    -

    Similarly, because Markdown supports inline HTML, if you use -angle brackets as delimiters for HTML tags, Markdown will treat them as -such. But if you write:

    -
    4 < 5
    -
    -

    Markdown will translate it to:

    -
    4 &lt; 5
    -
    -

    However, inside Markdown code spans and blocks, angle brackets and -ampersands are always encoded automatically. This makes it easy to use -Markdown to write about HTML code. (As opposed to raw HTML, which is a -terrible format for writing about HTML syntax, because every single < -and & in your example code needs to be escaped.)

    -
    -

    Block Elements

    -

    Paragraphs and Line Breaks

    -

    A paragraph is simply one or more consecutive lines of text, separated -by one or more blank lines. (A blank line is any line that looks like a -blank line -- a line containing nothing but spaces or tabs is considered -blank.) Normal paragraphs should not be intended with spaces or tabs.

    -

    The implication of the "one or more consecutive lines of text" rule is -that Markdown supports "hard-wrapped" text paragraphs. This differs -significantly from most other text-to-HTML formatters (including Movable -Type's "Convert Line Breaks" option) which translate every line break -character in a paragraph into a <br /> tag.

    -

    When you do want to insert a <br /> break tag using Markdown, you -end a line with two or more spaces, then type return.

    -

    Yes, this takes a tad more effort to create a <br />, but a simplistic -"every line break is a <br />" rule wouldn't work for Markdown. -Markdown's email-style blockquoting and multi-paragraph list items -work best -- and look better -- when you format them with hard breaks.

    - -

    Markdown supports two styles of headers, Setext and atx.

    -

    Setext-style headers are "underlined" using equal signs (for first-level -headers) and dashes (for second-level headers). For example:

    -
    This is an H1
    -=============
    -
    -This is an H2
    --------------
    -
    -

    Any number of underlining ='s or -'s will work.

    -

    Atx-style headers use 1-6 hash characters at the start of the line, -corresponding to header levels 1-6. For example:

    -
    # This is an H1
    -
    -## This is an H2
    -
    -###### This is an H6
    -
    -

    Optionally, you may "close" atx-style headers. This is purely -cosmetic -- you can use this if you think it looks better. The -closing hashes don't even need to match the number of hashes -used to open the header. (The number of opening hashes -determines the header level.) :

    -
    # This is an H1 #
    -
    -## This is an H2 ##
    -
    -### This is an H3 ######
    -
    -

    Blockquotes

    -

    Markdown uses email-style > characters for blockquoting. If you're -familiar with quoting passages of text in an email message, then you -know how to create a blockquote in Markdown. It looks best if you hard -wrap the text and put a > before every line:

    -
    > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
    -> consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
    -> Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
    -> 
    -> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
    -> id sem consectetuer libero luctus adipiscing.
    -
    -

    Markdown allows you to be lazy and only put the > before the first -line of a hard-wrapped paragraph:

    -
    > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet,
    -consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus.
    -Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus.
    -
    -> Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse
    -id sem consectetuer libero luctus adipiscing.
    -
    -

    Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by -adding additional levels of >:

    -
    > This is the first level of quoting.
    ->
    -> > This is nested blockquote.
    ->
    -> Back to the first level.
    -
    -

    Blockquotes can contain other Markdown elements, including headers, lists, -and code blocks:

    -
    > ## This is a header.
    -> 
    -> 1.   This is the first list item.
    -> 2.   This is the second list item.
    -> 
    -> Here's some example code:
    -> 
    ->     return shell_exec("echo $input | $markdown_script");
    -
    -

    Any decent text editor should make email-style quoting easy. For -example, with BBEdit, you can make a selection and choose Increase -Quote Level from the Text menu.

    -

    Lists

    -

    Markdown supports ordered (numbered) and unordered (bulleted) lists.

    -

    Unordered lists use asterisks, pluses, and hyphens -- interchangably --- as list markers:

    -
    *   Red
    -*   Green
    -*   Blue
    -
    -

    is equivalent to:

    -
    +   Red
    -+   Green
    -+   Blue
    -
    -

    and:

    -
    -   Red
    --   Green
    --   Blue
    -
    -

    Ordered lists use numbers followed by periods:

    -
    1.  Bird
    -2.  McHale
    -3.  Parish
    -
    -

    It's important to note that the actual numbers you use to mark the -list have no effect on the HTML output Markdown produces. The HTML -Markdown produces from the above list is:

    -
    <ol>
    -<li>Bird</li>
    -<li>McHale</li>
    -<li>Parish</li>
    -</ol>
    -
    -

    If you instead wrote the list in Markdown like this:

    -
    1.  Bird
    -1.  McHale
    -1.  Parish
    -
    -

    or even:

    -
    3. Bird
    -1. McHale
    -8. Parish
    -
    -

    you'd get the exact same HTML output. The point is, if you want to, -you can use ordinal numbers in your ordered Markdown lists, so that -the numbers in your source match the numbers in your published HTML. -But if you want to be lazy, you don't have to.

    -

    If you do use lazy list numbering, however, you should still start the -list with the number 1. At some point in the future, Markdown may support -starting ordered lists at an arbitrary number.

    -

    List markers typically start at the left margin, but may be indented by -up to three spaces. List markers must be followed by one or more spaces -or a tab.

    -

    To make lists look nice, you can wrap items with hanging indents:

    -
    *   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
    -    Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
    -    viverra nec, fringilla in, laoreet vitae, risus.
    -*   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
    -    Suspendisse id sem consectetuer libero luctus adipiscing.
    -
    -

    But if you want to be lazy, you don't have to:

    -
    *   Lorem ipsum dolor sit amet, consectetuer adipiscing elit.
    -Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi,
    -viverra nec, fringilla in, laoreet vitae, risus.
    -*   Donec sit amet nisl. Aliquam semper ipsum sit amet velit.
    -Suspendisse id sem consectetuer libero luctus adipiscing.
    -
    -

    If list items are separated by blank lines, Markdown will wrap the -items in <p> tags in the HTML output. For example, this input:

    -
    *   Bird
    -*   Magic
    -
    -

    will turn into:

    -
    <ul>
    -<li>Bird</li>
    -<li>Magic</li>
    -</ul>
    -
    -

    But this:

    -
    *   Bird
    -
    -*   Magic
    -
    -

    will turn into:

    -
    <ul>
    -<li><p>Bird</p></li>
    -<li><p>Magic</p></li>
    -</ul>
    -
    -

    List items may consist of multiple paragraphs. Each subsequent -paragraph in a list item must be intended by either 4 spaces -or one tab:

    -
    1.  This is a list item with two paragraphs. Lorem ipsum dolor
    -    sit amet, consectetuer adipiscing elit. Aliquam hendrerit
    -    mi posuere lectus.
    -
    -    Vestibulum enim wisi, viverra nec, fringilla in, laoreet
    -    vitae, risus. Donec sit amet nisl. Aliquam semper ipsum
    -    sit amet velit.
    -
    -2.  Suspendisse id sem consectetuer libero luctus adipiscing.
    -
    -

    It looks nice if you indent every line of the subsequent -paragraphs, but here again, Markdown will allow you to be -lazy:

    -
    *   This is a list item with two paragraphs.
    -
    -    This is the second paragraph in the list item. You're
    -only required to indent the first line. Lorem ipsum dolor
    -sit amet, consectetuer adipiscing elit.
    -
    -*   Another item in the same list.
    -
    -

    To put a blockquote within a list item, the blockquote's > -delimiters need to be indented:

    -
    *   A list item with a blockquote:
    -
    -    > This is a blockquote
    -    > inside a list item.
    -
    -

    To put a code block within a list item, the code block needs -to be indented twice -- 8 spaces or two tabs:

    -
    *   A list item with a code block:
    -
    -        <code goes here>
    -
    -

    It's worth noting that it's possible to trigger an ordered list by -accident, by writing something like this:

    -
    1986. What a great season.
    -
    -

    In other words, a number-period-space sequence at the beginning of a -line. To avoid this, you can backslash-escape the period:

    -
    1986\. What a great season.
    -
    -

    Code Blocks

    -

    Pre-formatted code blocks are used for writing about programming or -markup source code. Rather than forming normal paragraphs, the lines -of a code block are interpreted literally. Markdown wraps a code block -in both <pre> and <code> tags.

    -

    To produce a code block in Markdown, simply indent every line of the -block by at least 4 spaces or 1 tab. For example, given this input:

    -
    This is a normal paragraph:
    -
    -    This is a code block.
    -
    -

    Markdown will generate:

    -
    <p>This is a normal paragraph:</p>
    -
    -<pre><code>This is a code block.
    -</code></pre>
    -
    -

    One level of indentation -- 4 spaces or 1 tab -- is removed from each -line of the code block. For example, this:

    -
    Here is an example of AppleScript:
    -
    -    tell application "Foo"
    -        beep
    -    end tell
    -
    -

    will turn into:

    -
    <p>Here is an example of AppleScript:</p>
    -
    -<pre><code>tell application "Foo"
    -    beep
    -end tell
    -</code></pre>
    -
    -

    A code block continues until it reaches a line that is not indented -(or the end of the article).

    -

    Within a code block, ampersands (&) and angle brackets (< and >) -are automatically converted into HTML entities. This makes it very -easy to include example HTML source code using Markdown -- just paste -it and indent it, and Markdown will handle the hassle of encoding the -ampersands and angle brackets. For example, this:

    -
        <div class="footer">
    -        &copy; 2004 Foo Corporation
    -    </div>
    -
    -

    will turn into:

    -
    <pre><code>&lt;div class="footer"&gt;
    -    &amp;copy; 2004 Foo Corporation
    -&lt;/div&gt;
    -</code></pre>
    -
    -

    Regular Markdown syntax is not processed within code blocks. E.g., -asterisks are just literal asterisks within a code block. This means -it's also easy to use Markdown to write about Markdown's own syntax.

    -

    Horizontal Rules

    -

    You can produce a horizontal rule tag (<hr />) by placing three or -more hyphens, asterisks, or underscores on a line by themselves. If you -wish, you may use spaces between the hyphens or asterisks. Each of the -following lines will produce a horizontal rule:

    -
    * * *
    -
    -***
    -
    -*****
    -
    -- - -
    -
    ----------------------------------------
    -
    -_ _ _
    -
    -
    -

    Span Elements

    - -

    Markdown supports two style of links: inline and reference.

    -

    In both styles, the link text is delimited by [square brackets].

    -

    To create an inline link, use a set of regular parentheses immediately -after the link text's closing square bracket. Inside the parentheses, -put the URL where you want the link to point, along with an optional -title for the link, surrounded in quotes. For example:

    -
    This is [an example](http://example.com/ "Title") inline link.
    -
    -[This link](http://example.net/) has no title attribute.
    -
    -

    Will produce:

    -
    <p>This is <a href="http://example.com/" title="Title">
    -an example</a> inline link.</p>
    -
    -<p><a href="http://example.net/">This link</a> has no
    -title attribute.</p>
    -
    -

    If you're referring to a local resource on the same server, you can -use relative paths:

    -
    See my [About](/about/) page for details.
    -
    -

    Reference-style links use a second set of square brackets, inside -which you place a label of your choosing to identify the link:

    -
    This is [an example][id] reference-style link.
    -
    -

    You can optionally use a space to separate the sets of brackets:

    -
    This is [an example] [id] reference-style link.
    -
    -

    Then, anywhere in the document, you define your link label like this, -on a line by itself:

    -
    [id]: http://example.com/  "Optional Title Here"
    -
    -

    That is:

    -
      -
    • Square brackets containing the link identifier (optionally -indented from the left margin using up to three spaces);
    • -
    • followed by a colon;
    • -
    • followed by one or more spaces (or tabs);
    • -
    • followed by the URL for the link;
    • -
    • optionally followed by a title attribute for the link, enclosed -in double or single quotes.
    • -
    -

    The link URL may, optionally, be surrounded by angle brackets:

    -
    [id]: <http://example.com/>  "Optional Title Here"
    -
    -

    You can put the title attribute on the next line and use extra spaces -or tabs for padding, which tends to look better with longer URLs:

    -
    [id]: http://example.com/longish/path/to/resource/here
    -    "Optional Title Here"
    -
    -

    Link definitions are only used for creating links during Markdown -processing, and are stripped from your document in the HTML output.

    -

    Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are not case sensitive. E.g. these two links:

    -
    [link text][a]
    -[link text][A]
    -
    -

    are equivalent.

    -

    The implicit link name shortcut allows you to omit the name of the -link, in which case the link text itself is used as the name. -Just use an empty set of square brackets -- e.g., to link the word -"Google" to the google.com web site, you could simply write:

    -
    [Google][]
    -
    -

    And then define the link:

    -
    [Google]: http://google.com/
    -
    -

    Because link names may contain spaces, this shortcut even works for -multiple words in the link text:

    -
    Visit [Daring Fireball][] for more information.
    -
    -

    And then define the link:

    -
    [Daring Fireball]: http://daringfireball.net/
    -
    -

    Link definitions can be placed anywhere in your Markdown document. I -tend to put them immediately after each paragraph in which they're -used, but if you want, you can put them all at the end of your -document, sort of like footnotes.

    -

    Here's an example of reference links in action:

    -
    I get 10 times more traffic from [Google] [1] than from
    -[Yahoo] [2] or [MSN] [3].
    -
    -  [1]: http://google.com/        "Google"
    -  [2]: http://search.yahoo.com/  "Yahoo Search"
    -  [3]: http://search.msn.com/    "MSN Search"
    -
    -

    Using the implicit link name shortcut, you could instead write:

    -
    I get 10 times more traffic from [Google][] than from
    -[Yahoo][] or [MSN][].
    -
    -  [google]: http://google.com/        "Google"
    -  [yahoo]:  http://search.yahoo.com/  "Yahoo Search"
    -  [msn]:    http://search.msn.com/    "MSN Search"
    -
    -

    Both of the above examples will produce the following HTML output:

    -
    <p>I get 10 times more traffic from <a href="http://google.com/"
    -title="Google">Google</a> than from
    -<a href="http://search.yahoo.com/" title="Yahoo Search">Yahoo</a>
    -or <a href="http://search.msn.com/" title="MSN Search">MSN</a>.</p>
    -
    -

    For comparison, here is the same paragraph written using -Markdown's inline link style:

    -
    I get 10 times more traffic from [Google](http://google.com/ "Google")
    -than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or
    -[MSN](http://search.msn.com/ "MSN Search").
    -
    -

    The point of reference-style links is not that they're easier to -write. The point is that with reference-style links, your document -source is vastly more readable. Compare the above examples: using -reference-style links, the paragraph itself is only 81 characters -long; with inline-style links, it's 176 characters; and as raw HTML, -it's 234 characters. In the raw HTML, there's more markup than there -is text.

    -

    With Markdown's reference-style links, a source document much more -closely resembles the final output, as rendered in a browser. By -allowing you to move the markup-related metadata out of the paragraph, -you can add links without interrupting the narrative flow of your -prose.

    -

    Emphasis

    -

    Markdown treats asterisks (*) and underscores (_) as indicators of -emphasis. Text wrapped with one * or _ will be wrapped with an -HTML <em> tag; double *'s or _'s will be wrapped with an HTML -<strong> tag. E.g., this input:

    -
    *single asterisks*
    -
    -_single underscores_
    -
    -**double asterisks**
    -
    -__double underscores__
    -
    -

    will produce:

    -
    <em>single asterisks</em>
    -
    -<em>single underscores</em>
    -
    -<strong>double asterisks</strong>
    -
    -<strong>double underscores</strong>
    -
    -

    You can use whichever style you prefer; the lone restriction is that -the same character must be used to open and close an emphasis span.

    -

    Emphasis can be used in the middle of a word:

    -
    un*fucking*believable
    -
    -

    But if you surround an * or _ with spaces, it'll be treated as a -literal asterisk or underscore.

    -

    To produce a literal asterisk or underscore at a position where it -would otherwise be used as an emphasis delimiter, you can backslash -escape it:

    -
    \*this text is surrounded by literal asterisks\*
    -
    -

    Code

    -

    To indicate a span of code, wrap it with backtick quotes (`). -Unlike a pre-formatted code block, a code span indicates code within a -normal paragraph. For example:

    -
    Use the `printf()` function.
    -
    -

    will produce:

    -
    <p>Use the <code>printf()</code> function.</p>
    -
    -

    To include a literal backtick character within a code span, you can use -multiple backticks as the opening and closing delimiters:

    -
    ``There is a literal backtick (`) here.``
    -
    -

    which will produce this:

    -
    <p><code>There is a literal backtick (`) here.</code></p>
    -
    -

    The backtick delimiters surrounding a code span may include spaces -- -one after the opening, one before the closing. This allows you to place -literal backtick characters at the beginning or end of a code span:

    -
    A single backtick in a code span: `` ` ``
    -
    -A backtick-delimited string in a code span: `` `foo` ``
    -
    -

    will produce:

    -
    <p>A single backtick in a code span: <code>`</code></p>
    -
    -<p>A backtick-delimited string in a code span: <code>`foo`</code></p>
    -
    -

    With a code span, ampersands and angle brackets are encoded as HTML -entities automatically, which makes it easy to include example HTML -tags. Markdown will turn this:

    -
    Please don't use any `<blink>` tags.
    -
    -

    into:

    -
    <p>Please don't use any <code>&lt;blink&gt;</code> tags.</p>
    -
    -

    You can write this:

    -
    `&#8212;` is the decimal-encoded equivalent of `&mdash;`.
    -
    -

    to produce:

    -
    <p><code>&amp;#8212;</code> is the decimal-encoded
    -equivalent of <code>&amp;mdash;</code>.</p>
    -
    -

    Images

    -

    Admittedly, it's fairly difficult to devise a "natural" syntax for -placing images into a plain text document format.

    -

    Markdown uses an image syntax that is intended to resemble the syntax -for links, allowing for two styles: inline and reference.

    -

    Inline image syntax looks like this:

    -
    ![Alt text](/path/to/img.jpg)
    -
    -![Alt text](/path/to/img.jpg "Optional title")
    -
    -

    That is:

    -
      -
    • An exclamation mark: !;
    • -
    • followed by a set of square brackets, containing the alt -attribute text for the image;
    • -
    • followed by a set of parentheses, containing the URL or path to -the image, and an optional title attribute enclosed in double -or single quotes.
    • -
    -

    Reference-style image syntax looks like this:

    -
    ![Alt text][id]
    -
    -

    Where "id" is the name of a defined image reference. Image references -are defined using syntax identical to link references:

    -
    [id]: url/to/image  "Optional title attribute"
    -
    -

    As of this writing, Markdown has no syntax for specifying the -dimensions of an image; if this is important to you, you can simply -use regular HTML <img> tags.

    -
    -

    Miscellaneous

    - -

    Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this:

    -
    <http://example.com/>
    -
    -

    Markdown will turn this into:

    -
    <a href="http://example.com/">http://example.com/</a>
    -
    -

    Automatic links for email addresses work similarly, except that -Markdown will also perform a bit of randomized decimal and hex -entity-encoding to help obscure your address from address-harvesting -spambots. For example, Markdown will turn this:

    -
    <address@example.com>
    -
    -

    into something like this:

    -
    <a href="&#x6D;&#x61;i&#x6C;&#x74;&#x6F;:&#x61;&#x64;&#x64;&#x72;&#x65;
    -&#115;&#115;&#64;&#101;&#120;&#x61;&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;
    -&#109;">&#x61;&#x64;&#x64;&#x72;&#x65;&#115;&#115;&#64;&#101;&#120;&#x61;
    -&#109;&#x70;&#x6C;e&#x2E;&#99;&#111;&#109;</a>
    -
    -

    which will render in a browser as a clickable link to "address@example.com".

    -

    (This sort of entity-encoding trick will indeed fool many, if not -most, address-harvesting bots, but it definitely won't fool all of -them. It's better than nothing, but an address published in this way -will probably eventually start receiving spam.)

    -

    Backslash Escapes

    -

    Markdown allows you to use backslash escapes to generate literal -characters which would otherwise have special meaning in Markdown's -formatting syntax. For example, if you wanted to surround a word with -literal asterisks (instead of an HTML <em> tag), you can backslashes -before the asterisks, like this:

    -
    \*literal asterisks\*
    -
    -

    Markdown provides backslash escapes for the following characters:

    -
    \   backslash
    -`   backtick
    -*   asterisk
    -_   underscore
    -{}  curly braces
    -[]  square brackets
    -()  parentheses
    -#   hash mark
    -+   plus sign
    --   minus sign (hyphen)
    -.   dot
    -!   exclamation mark
    -
    diff --git a/oldtests/Original/Markdown_Documentation_Syntax.markdown b/oldtests/Original/Markdown_Documentation_Syntax.markdown deleted file mode 100644 index 57360a1..0000000 --- a/oldtests/Original/Markdown_Documentation_Syntax.markdown +++ /dev/null @@ -1,888 +0,0 @@ -Markdown: Syntax -================ - - - - -* [Overview](#overview) - * [Philosophy](#philosophy) - * [Inline HTML](#html) - * [Automatic Escaping for Special Characters](#autoescape) -* [Block Elements](#block) - * [Paragraphs and Line Breaks](#p) - * [Headers](#header) - * [Blockquotes](#blockquote) - * [Lists](#list) - * [Code Blocks](#precode) - * [Horizontal Rules](#hr) -* [Span Elements](#span) - * [Links](#link) - * [Emphasis](#em) - * [Code](#code) - * [Images](#img) -* [Miscellaneous](#misc) - * [Backslash Escapes](#backslash) - * [Automatic Links](#autolink) - - -**Note:** This document is itself written using Markdown; you -can [see the source for it by adding '.text' to the URL][src]. - - [src]: /projects/markdown/syntax.text - -* * * - -

    Overview

    - -

    Philosophy

    - -Markdown is intended to be as easy-to-read and easy-to-write as is feasible. - -Readability, however, is emphasized above all else. A Markdown-formatted -document should be publishable as-is, as plain text, without looking -like it's been marked up with tags or formatting instructions. While -Markdown's syntax has been influenced by several existing text-to-HTML -filters -- including [Setext] [1], [atx] [2], [Textile] [3], [reStructuredText] [4], -[Grutatext] [5], and [EtText] [6] -- the single biggest source of -inspiration for Markdown's syntax is the format of plain text email. - - [1]: http://docutils.sourceforge.net/mirror/setext.html - [2]: http://www.aaronsw.com/2002/atx/ - [3]: http://textism.com/tools/textile/ - [4]: http://docutils.sourceforge.net/rst.html - [5]: http://www.triptico.com/software/grutatxt.html - [6]: http://ettext.taint.org/doc/ - -To this end, Markdown's syntax is comprised entirely of punctuation -characters, which punctuation characters have been carefully chosen so -as to look like what they mean. E.g., asterisks around a word actually -look like \*emphasis\*. Markdown lists look like, well, lists. Even -blockquotes look like quoted passages of text, assuming you've ever -used email. - - - -

    Inline HTML

    - -Markdown's syntax is intended for one purpose: to be used as a -format for *writing* for the web. - -Markdown is not a replacement for HTML, or even close to it. Its -syntax is very small, corresponding only to a very small subset of -HTML tags. The idea is *not* to create a syntax that makes it easier -to insert HTML tags. In my opinion, HTML tags are already easy to -insert. The idea for Markdown is to make it easy to read, write, and -edit prose. HTML is a *publishing* format; Markdown is a *writing* -format. Thus, Markdown's formatting syntax only addresses issues that -can be conveyed in plain text. - -For any markup that is not covered by Markdown's syntax, you simply -use HTML itself. There's no need to preface it or delimit it to -indicate that you're switching from Markdown to HTML; you just use -the tags. - -The only restrictions are that block-level HTML elements -- e.g. `
    `, -``, `
    `, `

    `, etc. -- must be separated from surrounding -content by blank lines, and the start and end tags of the block should -not be indented with tabs or spaces. Markdown is smart enough not -to add extra (unwanted) `

    ` tags around HTML block-level tags. - -For example, to add an HTML table to a Markdown article: - - This is a regular paragraph. - -

    - - - -
    Foo
    - - This is another regular paragraph. - -Note that Markdown formatting syntax is not processed within block-level -HTML tags. E.g., you can't use Markdown-style `*emphasis*` inside an -HTML block. - -Span-level HTML tags -- e.g. ``, ``, or `` -- can be -used anywhere in a Markdown paragraph, list item, or header. If you -want, you can even use HTML tags instead of Markdown formatting; e.g. if -you'd prefer to use HTML `` or `` tags instead of Markdown's -link or image syntax, go right ahead. - -Unlike block-level HTML tags, Markdown syntax *is* processed within -span-level tags. - - -

    Automatic Escaping for Special Characters

    - -In HTML, there are two characters that demand special treatment: `<` -and `&`. Left angle brackets are used to start tags; ampersands are -used to denote HTML entities. If you want to use them as literal -characters, you must escape them as entities, e.g. `<`, and -`&`. - -Ampersands in particular are bedeviling for web writers. If you want to -write about 'AT&T', you need to write '`AT&T`'. You even need to -escape ampersands within URLs. Thus, if you want to link to: - - http://images.google.com/images?num=30&q=larry+bird - -you need to encode the URL as: - - http://images.google.com/images?num=30&q=larry+bird - -in your anchor tag `href` attribute. Needless to say, this is easy to -forget, and is probably the single most common source of HTML validation -errors in otherwise well-marked-up web sites. - -Markdown allows you to use these characters naturally, taking care of -all the necessary escaping for you. If you use an ampersand as part of -an HTML entity, it remains unchanged; otherwise it will be translated -into `&`. - -So, if you want to include a copyright symbol in your article, you can write: - - © - -and Markdown will leave it alone. But if you write: - - AT&T - -Markdown will translate it to: - - AT&T - -Similarly, because Markdown supports [inline HTML](#html), if you use -angle brackets as delimiters for HTML tags, Markdown will treat them as -such. But if you write: - - 4 < 5 - -Markdown will translate it to: - - 4 < 5 - -However, inside Markdown code spans and blocks, angle brackets and -ampersands are *always* encoded automatically. This makes it easy to use -Markdown to write about HTML code. (As opposed to raw HTML, which is a -terrible format for writing about HTML syntax, because every single `<` -and `&` in your example code needs to be escaped.) - - -* * * - - -

    Block Elements

    - - -

    Paragraphs and Line Breaks

    - -A paragraph is simply one or more consecutive lines of text, separated -by one or more blank lines. (A blank line is any line that looks like a -blank line -- a line containing nothing but spaces or tabs is considered -blank.) Normal paragraphs should not be intended with spaces or tabs. - -The implication of the "one or more consecutive lines of text" rule is -that Markdown supports "hard-wrapped" text paragraphs. This differs -significantly from most other text-to-HTML formatters (including Movable -Type's "Convert Line Breaks" option) which translate every line break -character in a paragraph into a `
    ` tag. - -When you *do* want to insert a `
    ` break tag using Markdown, you -end a line with two or more spaces, then type return. - -Yes, this takes a tad more effort to create a `
    `, but a simplistic -"every line break is a `
    `" rule wouldn't work for Markdown. -Markdown's email-style [blockquoting][bq] and multi-paragraph [list items][l] -work best -- and look better -- when you format them with hard breaks. - - [bq]: #blockquote - [l]: #list - - - - - -Markdown supports two styles of headers, [Setext] [1] and [atx] [2]. - -Setext-style headers are "underlined" using equal signs (for first-level -headers) and dashes (for second-level headers). For example: - - This is an H1 - ============= - - This is an H2 - ------------- - -Any number of underlining `=`'s or `-`'s will work. - -Atx-style headers use 1-6 hash characters at the start of the line, -corresponding to header levels 1-6. For example: - - # This is an H1 - - ## This is an H2 - - ###### This is an H6 - -Optionally, you may "close" atx-style headers. This is purely -cosmetic -- you can use this if you think it looks better. The -closing hashes don't even need to match the number of hashes -used to open the header. (The number of opening hashes -determines the header level.) : - - # This is an H1 # - - ## This is an H2 ## - - ### This is an H3 ###### - - -

    Blockquotes

    - -Markdown uses email-style `>` characters for blockquoting. If you're -familiar with quoting passages of text in an email message, then you -know how to create a blockquote in Markdown. It looks best if you hard -wrap the text and put a `>` before every line: - - > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, - > consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. - > Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. - > - > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse - > id sem consectetuer libero luctus adipiscing. - -Markdown allows you to be lazy and only put the `>` before the first -line of a hard-wrapped paragraph: - - > This is a blockquote with two paragraphs. Lorem ipsum dolor sit amet, - consectetuer adipiscing elit. Aliquam hendrerit mi posuere lectus. - Vestibulum enim wisi, viverra nec, fringilla in, laoreet vitae, risus. - - > Donec sit amet nisl. Aliquam semper ipsum sit amet velit. Suspendisse - id sem consectetuer libero luctus adipiscing. - -Blockquotes can be nested (i.e. a blockquote-in-a-blockquote) by -adding additional levels of `>`: - - > This is the first level of quoting. - > - > > This is nested blockquote. - > - > Back to the first level. - -Blockquotes can contain other Markdown elements, including headers, lists, -and code blocks: - - > ## This is a header. - > - > 1. This is the first list item. - > 2. This is the second list item. - > - > Here's some example code: - > - > return shell_exec("echo $input | $markdown_script"); - -Any decent text editor should make email-style quoting easy. For -example, with BBEdit, you can make a selection and choose Increase -Quote Level from the Text menu. - - -

    Lists

    - -Markdown supports ordered (numbered) and unordered (bulleted) lists. - -Unordered lists use asterisks, pluses, and hyphens -- interchangably --- as list markers: - - * Red - * Green - * Blue - -is equivalent to: - - + Red - + Green - + Blue - -and: - - - Red - - Green - - Blue - -Ordered lists use numbers followed by periods: - - 1. Bird - 2. McHale - 3. Parish - -It's important to note that the actual numbers you use to mark the -list have no effect on the HTML output Markdown produces. The HTML -Markdown produces from the above list is: - -
      -
    1. Bird
    2. -
    3. McHale
    4. -
    5. Parish
    6. -
    - -If you instead wrote the list in Markdown like this: - - 1. Bird - 1. McHale - 1. Parish - -or even: - - 3. Bird - 1. McHale - 8. Parish - -you'd get the exact same HTML output. The point is, if you want to, -you can use ordinal numbers in your ordered Markdown lists, so that -the numbers in your source match the numbers in your published HTML. -But if you want to be lazy, you don't have to. - -If you do use lazy list numbering, however, you should still start the -list with the number 1. At some point in the future, Markdown may support -starting ordered lists at an arbitrary number. - -List markers typically start at the left margin, but may be indented by -up to three spaces. List markers must be followed by one or more spaces -or a tab. - -To make lists look nice, you can wrap items with hanging indents: - - * Lorem ipsum dolor sit amet, consectetuer adipiscing elit. - Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, - viverra nec, fringilla in, laoreet vitae, risus. - * Donec sit amet nisl. Aliquam semper ipsum sit amet velit. - Suspendisse id sem consectetuer libero luctus adipiscing. - -But if you want to be lazy, you don't have to: - - * Lorem ipsum dolor sit amet, consectetuer adipiscing elit. - Aliquam hendrerit mi posuere lectus. Vestibulum enim wisi, - viverra nec, fringilla in, laoreet vitae, risus. - * Donec sit amet nisl. Aliquam semper ipsum sit amet velit. - Suspendisse id sem consectetuer libero luctus adipiscing. - -If list items are separated by blank lines, Markdown will wrap the -items in `

    ` tags in the HTML output. For example, this input: - - * Bird - * Magic - -will turn into: - -

      -
    • Bird
    • -
    • Magic
    • -
    - -But this: - - * Bird - - * Magic - -will turn into: - -
      -
    • Bird

    • -
    • Magic

    • -
    - -List items may consist of multiple paragraphs. Each subsequent -paragraph in a list item must be intended by either 4 spaces -or one tab: - - 1. This is a list item with two paragraphs. Lorem ipsum dolor - sit amet, consectetuer adipiscing elit. Aliquam hendrerit - mi posuere lectus. - - Vestibulum enim wisi, viverra nec, fringilla in, laoreet - vitae, risus. Donec sit amet nisl. Aliquam semper ipsum - sit amet velit. - - 2. Suspendisse id sem consectetuer libero luctus adipiscing. - -It looks nice if you indent every line of the subsequent -paragraphs, but here again, Markdown will allow you to be -lazy: - - * This is a list item with two paragraphs. - - This is the second paragraph in the list item. You're - only required to indent the first line. Lorem ipsum dolor - sit amet, consectetuer adipiscing elit. - - * Another item in the same list. - -To put a blockquote within a list item, the blockquote's `>` -delimiters need to be indented: - - * A list item with a blockquote: - - > This is a blockquote - > inside a list item. - -To put a code block within a list item, the code block needs -to be indented *twice* -- 8 spaces or two tabs: - - * A list item with a code block: - - - - -It's worth noting that it's possible to trigger an ordered list by -accident, by writing something like this: - - 1986. What a great season. - -In other words, a *number-period-space* sequence at the beginning of a -line. To avoid this, you can backslash-escape the period: - - 1986\. What a great season. - - - -

    Code Blocks

    - -Pre-formatted code blocks are used for writing about programming or -markup source code. Rather than forming normal paragraphs, the lines -of a code block are interpreted literally. Markdown wraps a code block -in both `
    ` and `` tags.
    -
    -To produce a code block in Markdown, simply indent every line of the
    -block by at least 4 spaces or 1 tab. For example, given this input:
    -
    -    This is a normal paragraph:
    -
    -        This is a code block.
    -
    -Markdown will generate:
    -
    -    

    This is a normal paragraph:

    - -
    This is a code block.
    -    
    - -One level of indentation -- 4 spaces or 1 tab -- is removed from each -line of the code block. For example, this: - - Here is an example of AppleScript: - - tell application "Foo" - beep - end tell - -will turn into: - -

    Here is an example of AppleScript:

    - -
    tell application "Foo"
    -        beep
    -    end tell
    -    
    - -A code block continues until it reaches a line that is not indented -(or the end of the article). - -Within a code block, ampersands (`&`) and angle brackets (`<` and `>`) -are automatically converted into HTML entities. This makes it very -easy to include example HTML source code using Markdown -- just paste -it and indent it, and Markdown will handle the hassle of encoding the -ampersands and angle brackets. For example, this: - - - -will turn into: - -
    <div class="footer">
    -        &copy; 2004 Foo Corporation
    -    </div>
    -    
    - -Regular Markdown syntax is not processed within code blocks. E.g., -asterisks are just literal asterisks within a code block. This means -it's also easy to use Markdown to write about Markdown's own syntax. - - - -

    Horizontal Rules

    - -You can produce a horizontal rule tag (`
    `) by placing three or -more hyphens, asterisks, or underscores on a line by themselves. If you -wish, you may use spaces between the hyphens or asterisks. Each of the -following lines will produce a horizontal rule: - - * * * - - *** - - ***** - - - - - - - --------------------------------------- - - _ _ _ - - -* * * - -

    Span Elements

    - - - -Markdown supports two style of links: *inline* and *reference*. - -In both styles, the link text is delimited by [square brackets]. - -To create an inline link, use a set of regular parentheses immediately -after the link text's closing square bracket. Inside the parentheses, -put the URL where you want the link to point, along with an *optional* -title for the link, surrounded in quotes. For example: - - This is [an example](http://example.com/ "Title") inline link. - - [This link](http://example.net/) has no title attribute. - -Will produce: - -

    This is - an example inline link.

    - -

    This link has no - title attribute.

    - -If you're referring to a local resource on the same server, you can -use relative paths: - - See my [About](/about/) page for details. - -Reference-style links use a second set of square brackets, inside -which you place a label of your choosing to identify the link: - - This is [an example][id] reference-style link. - -You can optionally use a space to separate the sets of brackets: - - This is [an example] [id] reference-style link. - -Then, anywhere in the document, you define your link label like this, -on a line by itself: - - [id]: http://example.com/ "Optional Title Here" - -That is: - -* Square brackets containing the link identifier (optionally - indented from the left margin using up to three spaces); -* followed by a colon; -* followed by one or more spaces (or tabs); -* followed by the URL for the link; -* optionally followed by a title attribute for the link, enclosed - in double or single quotes. - -The link URL may, optionally, be surrounded by angle brackets: - - [id]: "Optional Title Here" - -You can put the title attribute on the next line and use extra spaces -or tabs for padding, which tends to look better with longer URLs: - - [id]: http://example.com/longish/path/to/resource/here - "Optional Title Here" - -Link definitions are only used for creating links during Markdown -processing, and are stripped from your document in the HTML output. - -Link definition names may constist of letters, numbers, spaces, and punctuation -- but they are *not* case sensitive. E.g. these two links: - - [link text][a] - [link text][A] - -are equivalent. - -The *implicit link name* shortcut allows you to omit the name of the -link, in which case the link text itself is used as the name. -Just use an empty set of square brackets -- e.g., to link the word -"Google" to the google.com web site, you could simply write: - - [Google][] - -And then define the link: - - [Google]: http://google.com/ - -Because link names may contain spaces, this shortcut even works for -multiple words in the link text: - - Visit [Daring Fireball][] for more information. - -And then define the link: - - [Daring Fireball]: http://daringfireball.net/ - -Link definitions can be placed anywhere in your Markdown document. I -tend to put them immediately after each paragraph in which they're -used, but if you want, you can put them all at the end of your -document, sort of like footnotes. - -Here's an example of reference links in action: - - I get 10 times more traffic from [Google] [1] than from - [Yahoo] [2] or [MSN] [3]. - - [1]: http://google.com/ "Google" - [2]: http://search.yahoo.com/ "Yahoo Search" - [3]: http://search.msn.com/ "MSN Search" - -Using the implicit link name shortcut, you could instead write: - - I get 10 times more traffic from [Google][] than from - [Yahoo][] or [MSN][]. - - [google]: http://google.com/ "Google" - [yahoo]: http://search.yahoo.com/ "Yahoo Search" - [msn]: http://search.msn.com/ "MSN Search" - -Both of the above examples will produce the following HTML output: - -

    I get 10 times more traffic from Google than from - Yahoo - or MSN.

    - -For comparison, here is the same paragraph written using -Markdown's inline link style: - - I get 10 times more traffic from [Google](http://google.com/ "Google") - than from [Yahoo](http://search.yahoo.com/ "Yahoo Search") or - [MSN](http://search.msn.com/ "MSN Search"). - -The point of reference-style links is not that they're easier to -write. The point is that with reference-style links, your document -source is vastly more readable. Compare the above examples: using -reference-style links, the paragraph itself is only 81 characters -long; with inline-style links, it's 176 characters; and as raw HTML, -it's 234 characters. In the raw HTML, there's more markup than there -is text. - -With Markdown's reference-style links, a source document much more -closely resembles the final output, as rendered in a browser. By -allowing you to move the markup-related metadata out of the paragraph, -you can add links without interrupting the narrative flow of your -prose. - - -

    Emphasis

    - -Markdown treats asterisks (`*`) and underscores (`_`) as indicators of -emphasis. Text wrapped with one `*` or `_` will be wrapped with an -HTML `` tag; double `*`'s or `_`'s will be wrapped with an HTML -`` tag. E.g., this input: - - *single asterisks* - - _single underscores_ - - **double asterisks** - - __double underscores__ - -will produce: - - single asterisks - - single underscores - - double asterisks - - double underscores - -You can use whichever style you prefer; the lone restriction is that -the same character must be used to open and close an emphasis span. - -Emphasis can be used in the middle of a word: - - un*fucking*believable - -But if you surround an `*` or `_` with spaces, it'll be treated as a -literal asterisk or underscore. - -To produce a literal asterisk or underscore at a position where it -would otherwise be used as an emphasis delimiter, you can backslash -escape it: - - \*this text is surrounded by literal asterisks\* - - - -

    Code

    - -To indicate a span of code, wrap it with backtick quotes (`` ` ``). -Unlike a pre-formatted code block, a code span indicates code within a -normal paragraph. For example: - - Use the `printf()` function. - -will produce: - -

    Use the printf() function.

    - -To include a literal backtick character within a code span, you can use -multiple backticks as the opening and closing delimiters: - - ``There is a literal backtick (`) here.`` - -which will produce this: - -

    There is a literal backtick (`) here.

    - -The backtick delimiters surrounding a code span may include spaces -- -one after the opening, one before the closing. This allows you to place -literal backtick characters at the beginning or end of a code span: - - A single backtick in a code span: `` ` `` - - A backtick-delimited string in a code span: `` `foo` `` - -will produce: - -

    A single backtick in a code span: `

    - -

    A backtick-delimited string in a code span: `foo`

    - -With a code span, ampersands and angle brackets are encoded as HTML -entities automatically, which makes it easy to include example HTML -tags. Markdown will turn this: - - Please don't use any `` tags. - -into: - -

    Please don't use any <blink> tags.

    - -You can write this: - - `—` is the decimal-encoded equivalent of `—`. - -to produce: - -

    &#8212; is the decimal-encoded - equivalent of &mdash;.

    - - - -

    Images

    - -Admittedly, it's fairly difficult to devise a "natural" syntax for -placing images into a plain text document format. - -Markdown uses an image syntax that is intended to resemble the syntax -for links, allowing for two styles: *inline* and *reference*. - -Inline image syntax looks like this: - - ![Alt text](/path/to/img.jpg) - - ![Alt text](/path/to/img.jpg "Optional title") - -That is: - -* An exclamation mark: `!`; -* followed by a set of square brackets, containing the `alt` - attribute text for the image; -* followed by a set of parentheses, containing the URL or path to - the image, and an optional `title` attribute enclosed in double - or single quotes. - -Reference-style image syntax looks like this: - - ![Alt text][id] - -Where "id" is the name of a defined image reference. Image references -are defined using syntax identical to link references: - - [id]: url/to/image "Optional title attribute" - -As of this writing, Markdown has no syntax for specifying the -dimensions of an image; if this is important to you, you can simply -use regular HTML `` tags. - - -* * * - - -

    Miscellaneous

    - - - -Markdown supports a shortcut style for creating "automatic" links for URLs and email addresses: simply surround the URL or email address with angle brackets. What this means is that if you want to show the actual text of a URL or email address, and also have it be a clickable link, you can do this: - - - -Markdown will turn this into: - - http://example.com/ - -Automatic links for email addresses work similarly, except that -Markdown will also perform a bit of randomized decimal and hex -entity-encoding to help obscure your address from address-harvesting -spambots. For example, Markdown will turn this: - - - -into something like this: - - address@exa - mple.com - -which will render in a browser as a clickable link to "address@example.com". - -(This sort of entity-encoding trick will indeed fool many, if not -most, address-harvesting bots, but it definitely won't fool all of -them. It's better than nothing, but an address published in this way -will probably eventually start receiving spam.) - - - -

    Backslash Escapes

    - -Markdown allows you to use backslash escapes to generate literal -characters which would otherwise have special meaning in Markdown's -formatting syntax. For example, if you wanted to surround a word with -literal asterisks (instead of an HTML `` tag), you can backslashes -before the asterisks, like this: - - \*literal asterisks\* - -Markdown provides backslash escapes for the following characters: - - \ backslash - ` backtick - * asterisk - _ underscore - {} curly braces - [] square brackets - () parentheses - # hash mark - + plus sign - - minus sign (hyphen) - . dot - ! exclamation mark - diff --git a/oldtests/Original/Nested_blockquotes.html b/oldtests/Original/Nested_blockquotes.html deleted file mode 100644 index 02efc59..0000000 --- a/oldtests/Original/Nested_blockquotes.html +++ /dev/null @@ -1,7 +0,0 @@ -
    -

    foo

    -
    -

    bar

    -
    -

    foo

    -
    diff --git a/oldtests/Original/Nested_blockquotes.markdown b/oldtests/Original/Nested_blockquotes.markdown deleted file mode 100644 index ed3c624..0000000 --- a/oldtests/Original/Nested_blockquotes.markdown +++ /dev/null @@ -1,5 +0,0 @@ -> foo -> -> > bar -> -> foo diff --git a/oldtests/Original/Ordered_and_unordered_lists.html b/oldtests/Original/Ordered_and_unordered_lists.html deleted file mode 100644 index 78d752e..0000000 --- a/oldtests/Original/Ordered_and_unordered_lists.html +++ /dev/null @@ -1,112 +0,0 @@ -

    Unordered

    -

    Asterisks tight:

    -
      -
    • asterisk 1
    • -
    • asterisk 2
    • -
    • asterisk 3
    • -
    -

    Asterisks loose:

    -
      -
    • asterisk 1

    • -
    • asterisk 2

    • -
    • asterisk 3

    • -
    -
    -

    Pluses tight:

    -
      -
    • Plus 1
    • -
    • Plus 2
    • -
    • Plus 3
    • -
    -

    Pluses loose:

    -
      -
    • Plus 1

    • -
    • Plus 2

    • -
    • Plus 3

    • -
    -
    -

    Minuses tight:

    -
      -
    • Minus 1
    • -
    • Minus 2
    • -
    • Minus 3
    • -
    -

    Minuses loose:

    -
      -
    • Minus 1

    • -
    • Minus 2

    • -
    • Minus 3

    • -
    -

    Ordered

    -

    Tight:

    -
      -
    1. First
    2. -
    3. Second
    4. -
    5. Third
    6. -
    -

    and:

    -
      -
    1. One
    2. -
    3. Two
    4. -
    5. Three
    6. -
    -

    Loose using tabs:

    -
      -
    1. First

    2. -
    3. Second

    4. -
    5. Third

    6. -
    -

    and using spaces:

    -
      -
    1. One

    2. -
    3. Two

    4. -
    5. Three

    6. -
    -

    Multiple paragraphs:

    -
      -
    1. Item 1, graf one.

      -

      Item 2. graf two. The quick brown fox jumped over the lazy dog's -back.

    2. -
    3. Item 2.

    4. -
    5. Item 3.

    6. -
    -

    Nested

    -
      -
    • Tab -
        -
      • Tab -
          -
        • Tab
        • -
      • -
    • -
    -

    Here's another:

    -
      -
    1. First
    2. -
    3. Second: -
        -
      • Fee
      • -
      • Fie
      • -
      • Foe
      • -
    4. -
    5. Third
    6. -
    -

    Same thing but with paragraphs:

    -
      -
    1. First

    2. -
    3. Second:

      -
        -
      • Fee
      • -
      • Fie
      • -
      • Foe
      • -
    4. -
    5. Third

    6. -
    -

    This was an error in Markdown 1.0.1:

    -
      -
    • this

      -
        -
      • sub
      • -
      -

      that

    • -
    diff --git a/oldtests/Original/Ordered_and_unordered_lists.markdown b/oldtests/Original/Ordered_and_unordered_lists.markdown deleted file mode 100644 index 7f3b497..0000000 --- a/oldtests/Original/Ordered_and_unordered_lists.markdown +++ /dev/null @@ -1,131 +0,0 @@ -## Unordered - -Asterisks tight: - -* asterisk 1 -* asterisk 2 -* asterisk 3 - - -Asterisks loose: - -* asterisk 1 - -* asterisk 2 - -* asterisk 3 - -* * * - -Pluses tight: - -+ Plus 1 -+ Plus 2 -+ Plus 3 - - -Pluses loose: - -+ Plus 1 - -+ Plus 2 - -+ Plus 3 - -* * * - - -Minuses tight: - -- Minus 1 -- Minus 2 -- Minus 3 - - -Minuses loose: - -- Minus 1 - -- Minus 2 - -- Minus 3 - - -## Ordered - -Tight: - -1. First -2. Second -3. Third - -and: - -1. One -2. Two -3. Three - - -Loose using tabs: - -1. First - -2. Second - -3. Third - -and using spaces: - -1. One - -2. Two - -3. Three - -Multiple paragraphs: - -1. Item 1, graf one. - - Item 2. graf two. The quick brown fox jumped over the lazy dog's - back. - -2. Item 2. - -3. Item 3. - - - -## Nested - -* Tab - * Tab - * Tab - -Here's another: - -1. First -2. Second: - * Fee - * Fie - * Foe -3. Third - -Same thing but with paragraphs: - -1. First - -2. Second: - * Fee - * Fie - * Foe - -3. Third - - -This was an error in Markdown 1.0.1: - -* this - - * sub - - that diff --git a/oldtests/Original/README b/oldtests/Original/README deleted file mode 100644 index 5143258..0000000 --- a/oldtests/Original/README +++ /dev/null @@ -1,15 +0,0 @@ -These are from John Gruber's original markdown test suite, via -Michel Fortin's mdtest. - -The html files have been modified slightly in ways that do not affect the -semantics. For example, entities are used for quotes in text, and -blank lines are omitted between block-level tags. - -Trailing blank spaces are removed from lines in raw HTML blocks. - -The one (insignificant) semantic change is switching the order -of emph and strong tags in the output for ***strong and emph***. - -We have removed Hard-wrapped_paragraphs_with_list-like_lines tests, -because the new implementation no longer requires a blank line -before a list. diff --git a/oldtests/Original/Strong_and_em_together.html b/oldtests/Original/Strong_and_em_together.html deleted file mode 100644 index 2629594..0000000 --- a/oldtests/Original/Strong_and_em_together.html +++ /dev/null @@ -1,4 +0,0 @@ -

    This is strong and em.

    -

    So is this word.

    -

    This is strong and em.

    -

    So is this word.

    diff --git a/oldtests/Original/Strong_and_em_together.markdown b/oldtests/Original/Strong_and_em_together.markdown deleted file mode 100644 index 95ee690..0000000 --- a/oldtests/Original/Strong_and_em_together.markdown +++ /dev/null @@ -1,7 +0,0 @@ -***This is strong and em.*** - -So is ***this*** word. - -___This is strong and em.___ - -So is ___this___ word. diff --git a/oldtests/Original/Tabs.html b/oldtests/Original/Tabs.html deleted file mode 100644 index 5389bdf..0000000 --- a/oldtests/Original/Tabs.html +++ /dev/null @@ -1,19 +0,0 @@ -
      -
    • this is a list item -indented with tabs

    • -
    • this is a list item -indented with spaces

    • -
    -

    Code:

    -
    this code block is indented by one tab
    -
    -

    And:

    -
        this code block is indented by two tabs
    -
    -

    And:

    -
    +   this is an example list item
    -    indented with tabs
    -
    -+   this is an example list item
    -    indented with spaces
    -
    diff --git a/oldtests/Original/Tabs.markdown b/oldtests/Original/Tabs.markdown deleted file mode 100644 index 589d113..0000000 --- a/oldtests/Original/Tabs.markdown +++ /dev/null @@ -1,21 +0,0 @@ -+ this is a list item - indented with tabs - -+ this is a list item - indented with spaces - -Code: - - this code block is indented by one tab - -And: - - this code block is indented by two tabs - -And: - - + this is an example list item - indented with tabs - - + this is an example list item - indented with spaces diff --git a/oldtests/Original/Tidyness.html b/oldtests/Original/Tidyness.html deleted file mode 100644 index f2a8ce7..0000000 --- a/oldtests/Original/Tidyness.html +++ /dev/null @@ -1,8 +0,0 @@ -
    -

    A list within a blockquote:

    -
      -
    • asterisk 1
    • -
    • asterisk 2
    • -
    • asterisk 3
    • -
    -
    diff --git a/oldtests/Original/Tidyness.markdown b/oldtests/Original/Tidyness.markdown deleted file mode 100644 index 5f18b8d..0000000 --- a/oldtests/Original/Tidyness.markdown +++ /dev/null @@ -1,5 +0,0 @@ -> A list within a blockquote: -> -> * asterisk 1 -> * asterisk 2 -> * asterisk 3 diff --git a/oldtests/Tabs/TabConversionUnicode.html b/oldtests/Tabs/TabConversionUnicode.html deleted file mode 100644 index f596f6a..0000000 --- a/oldtests/Tabs/TabConversionUnicode.html +++ /dev/null @@ -1 +0,0 @@ -

    То лпой is a Russian word with a tab inside.

    diff --git a/oldtests/Tabs/TabConversionUnicode.markdown b/oldtests/Tabs/TabConversionUnicode.markdown deleted file mode 100644 index 0bd7b52..0000000 --- a/oldtests/Tabs/TabConversionUnicode.markdown +++ /dev/null @@ -1 +0,0 @@ -`То лпой` is a Russian word with a tab inside. -- cgit v1.2.3