diff options
author | John MacFarlane <jgm@berkeley.edu> | 2015-01-24 21:35:03 -0800 |
---|---|---|
committer | John MacFarlane <jgm@berkeley.edu> | 2015-01-24 21:39:07 -0800 |
commit | 829b089c80895d9a78938c5bc7747aea1cd48eb6 (patch) | |
tree | 53bd534741a90c547c5d87039efa5ee625da8081 /test | |
parent | 5ef31853d5161d4b5a2dfc0df94e6eaaeb3215d0 (diff) |
Removed implementation-specific material from repository.
The C and JS implementations are being split off into
different repositories.
This repository will just have the spec itself.
Diffstat (limited to 'test')
-rw-r--r-- | test/CMakeLists.txt | 49 | ||||
-rw-r--r-- | test/cmark.py | 40 | ||||
-rw-r--r-- | test/normalize.py | 179 | ||||
-rw-r--r-- | test/pathological_tests.py | 63 | ||||
-rwxr-xr-x | test/spec_tests.py | 139 |
5 files changed, 0 insertions, 470 deletions
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt deleted file mode 100644 index 11a27c6..0000000 --- a/test/CMakeLists.txt +++ /dev/null @@ -1,49 +0,0 @@ -# To get verbose output: cmake --build build --target "test" -- ARGS='-V' - -# By default, we run the spec tests only if python3 is available. -# To require the spec tests, compile with -DSPEC_TESTS=1 - -if (SPEC_TESTS) - find_package(PythonInterp 3 REQUIRED) -else(SPEC_TESTS) - find_package(PythonInterp 3) -endif(SPEC_TESTS) - -add_test(NAME api_test COMMAND api_test) - -if (WIN32) - file(TO_NATIVE_PATH ${CMAKE_BINARY_DIR}/src WIN_DLL_DIR) - set_tests_properties(api_test PROPERTIES - ENVIRONMENT "PATH=${WIN_DLL_DIR};$ENV{PATH}" - ) -endif(WIN32) - -IF (PYTHONINTERP_FOUND) - - add_test(html_normalization - ${PYTHON_EXECUTABLE} "-m" "doctest" - "${CMAKE_CURRENT_SOURCE_DIR}/normalize.py" - ) - - add_test(spectest_library - ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" - "${CMAKE_SOURCE_DIR}/spec.txt" "--library-dir" "${CMAKE_BINARY_DIR}/src" - ) - - add_test(pathological_tests_library - ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/pathological_tests.py" - "--library-dir" "${CMAKE_BINARY_DIR}/src" - ) - - add_test(spectest_executable - ${PYTHON_EXECUTABLE} "${CMAKE_CURRENT_SOURCE_DIR}/spec_tests.py" "--no-normalize" "--spec" "${CMAKE_SOURCE_DIR}/spec.txt" "--program" "${CMAKE_BINARY_DIR}/src/cmark" - ) - -ELSE(PYTHONINTERP_FOUND) - - message("\n*** A python 3 interpreter is required to run the spec tests.\n") - add_test(skipping_spectests - echo "Skipping spec tests, because no python 3 interpreter is available.") - -ENDIF(PYTHONINTERP_FOUND) - diff --git a/test/cmark.py b/test/cmark.py deleted file mode 100644 index 253e3a8..0000000 --- a/test/cmark.py +++ /dev/null @@ -1,40 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -from ctypes import CDLL, c_char_p, c_long -from subprocess import * -import platform - -def pipe_through_prog(prog, text): - p1 = Popen(prog.split(), stdout=PIPE, stdin=PIPE, stderr=PIPE) - [result, err] = p1.communicate(input=text.encode('utf-8')) - return [p1.returncode, result.decode('utf-8'), err] - -def use_library(lib, text): - textbytes = text.encode('utf-8') - textlen = len(textbytes) - return [0, lib(textbytes, textlen).decode('utf-8'), ''] - -class CMark: - def __init__(self, prog=None, library_dir=None): - self.prog = prog - if prog: - self.to_html = lambda x: pipe_through_prog(prog, x) - else: - sysname = platform.system() - libname = "libcmark" - if sysname == 'Darwin': - libname += ".dylib" - elif sysname == 'Windows': - libname = "cmark.dll" - else: - libname += ".so" - if library_dir: - libpath = library_dir + "/" + libname - else: - libpath = "build/src/" + libname - cmark = CDLL(libpath) - markdown = cmark.cmark_markdown_to_html - markdown.restype = c_char_p - markdown.argtypes = [c_char_p, c_long] - self.to_html = lambda x: use_library(markdown, x) diff --git a/test/normalize.py b/test/normalize.py deleted file mode 100644 index 03d958e..0000000 --- a/test/normalize.py +++ /dev/null @@ -1,179 +0,0 @@ -# -*- coding: utf-8 -*- -from html.parser import HTMLParser, HTMLParseError -from html.entities import name2codepoint -import sys -import re -import cgi - -# Normalization code, adapted from -# https://github.com/karlcow/markdown-testsuite/ -significant_attrs = ["alt", "href", "src", "title"] -whitespace_re = re.compile('\s+') -class MyHTMLParser(HTMLParser): - def __init__(self): - HTMLParser.__init__(self) - self.last = "starttag" - self.in_pre = False - self.output = "" - self.last_tag = "" - def handle_data(self, data): - after_tag = self.last == "endtag" or self.last == "starttag" - after_block_tag = after_tag and self.is_block_tag(self.last_tag) - if after_tag and self.last_tag == "br": - data = data.lstrip('\n') - data = whitespace_re.sub(' ', data) - if after_block_tag and not self.in_pre: - if self.last == "starttag": - data = data.lstrip() - elif self.last == "endtag": - data = data.strip() - self.output += data - self.last = "data" - def handle_endtag(self, tag): - if tag == "pre": - self.in_pre = False - elif self.is_block_tag(tag): - self.output = self.output.rstrip() - self.output += "</" + tag + ">" - self.last_tag = tag - self.last = "endtag" - def handle_starttag(self, tag, attrs): - if tag == "pre": - self.in_pre = True - if self.is_block_tag(tag): - self.output = self.output.rstrip() - self.output += "<" + tag - # For now we don't strip out 'extra' attributes, because of - # raw HTML test cases. - # attrs = filter(lambda attr: attr[0] in significant_attrs, attrs) - if attrs: - attrs.sort() - for (k,v) in attrs: - self.output += " " + k - if v != None: - self.output += ("=" + '"' + cgi.escape(v,quote=True) + '"') - self.output += ">" - self.last_tag = tag - self.last = "starttag" - def handle_startendtag(self, tag, attrs): - """Ignore closing tag for self-closing """ - self.handle_starttag(tag, attrs) - self.last_tag = tag - self.last = "endtag" - def handle_comment(self, data): - self.output += '<!--' + data + '-->' - self.last = "comment" - def handle_decl(self, data): - self.output += '<!' + data + '>' - self.last = "decl" - def unknown_decl(self, data): - self.output += '<!' + data + '>' - self.last = "decl" - def handle_pi(self,data): - self.output += '<?' + data + '>' - self.last = "pi" - def handle_entityref(self, name): - try: - c = chr(name2codepoint[name]) - except KeyError: - c = None - self.output_char(c, '&' + name + ';') - self.last = "ref" - def handle_charref(self, name): - try: - if name.startswith("x"): - c = chr(int(name[1:], 16)) - else: - c = chr(int(name)) - except ValueError: - c = None - self.output_char(c, '&' + name + ';') - self.last = "ref" - # Helpers. - def output_char(self, c, fallback): - if c == '<': - self.output += "<" - elif c == '>': - self.output += ">" - elif c == '&': - self.output += "&" - elif c == '"': - self.output += """ - elif c == None: - self.output += fallback - else: - self.output += c - - def is_block_tag(self,tag): - return (tag in ['article', 'header', 'aside', 'hgroup', 'blockquote', - 'hr', 'iframe', 'body', 'li', 'map', 'button', 'object', 'canvas', - 'ol', 'caption', 'output', 'col', 'p', 'colgroup', 'pre', 'dd', - 'progress', 'div', 'section', 'dl', 'table', 'td', 'dt', - 'tbody', 'embed', 'textarea', 'fieldset', 'tfoot', 'figcaption', - 'th', 'figure', 'thead', 'footer', 'tr', 'form', 'ul', - 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'video', 'script', 'style']) - -def normalize_html(html): - r""" - Return normalized form of HTML which ignores insignificant output - differences: - - Multiple inner whitespaces are collapsed to a single space (except - in pre tags): - - >>> normalize_html("<p>a \t b</p>") - '<p>a b</p>' - - >>> normalize_html("<p>a \t\nb</p>") - '<p>a b</p>' - - * Whitespace surrounding block-level tags is removed. - - >>> normalize_html("<p>a b</p>") - '<p>a b</p>' - - >>> normalize_html(" <p>a b</p>") - '<p>a b</p>' - - >>> normalize_html("<p>a b</p> ") - '<p>a b</p>' - - >>> normalize_html("\n\t<p>\n\t\ta b\t\t</p>\n\t") - '<p>a b</p>' - - >>> normalize_html("<i>a b</i> ") - '<i>a b</i> ' - - * Self-closing tags are converted to open tags. - - >>> normalize_html("<br />") - '<br>' - - * Attributes are sorted and lowercased. - - >>> normalize_html('<a title="bar" HREF="foo">x</a>') - '<a href="foo" title="bar">x</a>' - - * References are converted to unicode, except that '<', '>', '&', and - '"' are rendered using entities. - - >>> normalize_html("∀&><"") - '\u2200&><"' - - """ - html_chunk_re = re.compile("(\<!\[CDATA\[.*?\]\]\>|\<[^>]*\>|[^<]+)") - try: - parser = MyHTMLParser() - # We work around HTMLParser's limitations parsing CDATA - # by breaking the input into chunks and passing CDATA chunks - # through verbatim. - for chunk in re.finditer(html_chunk_re, html): - if chunk.group(0)[:8] == "<![CDATA": - parser.output += chunk.group(0) - else: - parser.feed(chunk.group(0)) - parser.close() - return parser.output - except HTMLParseError as e: - sys.stderr.write("Normalization error: " + e.msg + "\n") - return html # on error, return unnormalized HTML diff --git a/test/pathological_tests.py b/test/pathological_tests.py deleted file mode 100644 index 0e991f9..0000000 --- a/test/pathological_tests.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import re -import argparse -import sys -import platform -from cmark import CMark - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Run cmark tests.') - parser.add_argument('--program', dest='program', nargs='?', default=None, - help='program to test') - parser.add_argument('--library-dir', dest='library_dir', nargs='?', - default=None, help='directory containing dynamic library') - args = parser.parse_args(sys.argv[1:]) - -cmark = CMark(prog=args.program, library_dir=args.library_dir) - -# list of pairs consisting of input and a regex that must match the output. -pathological = { - # note - some pythons have limit of 65535 for {num-matches} in re. - "nested strong emph": - (("*a **a " * 65000) + "b" + (" a** a*" * 65000), - re.compile("(<em>a <strong>a ){65000}b( a</strong> a</em>){65000}")), - "nested brackets": - (("[" * 50000) + "a" + ("]" * 50000), - re.compile("\[{50000}a\]{50000}")), - "nested block quotes": - ((("> " * 50000) + "a"), - re.compile("(<blockquote>\n){50000}")), - "U+0000 in input": - ("abc\u0000de\u0000", - re.compile("abc\ufffd?de\ufffd?")) - } - -whitespace_re = re.compile('/s+/') -passed = 0 -errored = 0 -failed = 0 - -print("Testing pathological cases:") -for description in pathological: - print(description) - (inp, regex) = pathological[description] - [rc, actual, err] = cmark.to_html(inp) - if rc != 0: - errored += 1 - print(description) - print("program returned error code %d" % rc) - print(err) - elif regex.search(actual): - passed += 1 - else: - print(description, 'failed') - print(repr(actual)) - failed += 1 - -print("%d passed, %d failed, %d errored" % (passed, failed, errored)) -if (failed == 0 and errored == 0): - exit(0) -else: - exit(1) diff --git a/test/spec_tests.py b/test/spec_tests.py deleted file mode 100755 index cc676be..0000000 --- a/test/spec_tests.py +++ /dev/null @@ -1,139 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import sys -from difflib import unified_diff -import argparse -import re -import json -from cmark import CMark -from normalize import normalize_html - -if __name__ == "__main__": - parser = argparse.ArgumentParser(description='Run cmark tests.') - parser.add_argument('-p', '--program', dest='program', nargs='?', default=None, - help='program to test') - parser.add_argument('-s', '--spec', dest='spec', nargs='?', default='spec.txt', - help='path to spec') - parser.add_argument('-P', '--pattern', dest='pattern', nargs='?', - default=None, help='limit to sections matching regex pattern') - parser.add_argument('--library-dir', dest='library_dir', nargs='?', - default=None, help='directory containing dynamic library') - parser.add_argument('--no-normalize', dest='normalize', - action='store_const', const=False, default=True, - help='do not normalize HTML') - parser.add_argument('-d', '--dump-tests', dest='dump_tests', - action='store_const', const=True, default=False, - help='dump tests in JSON format') - parser.add_argument('--debug-normalization', dest='debug_normalization', - action='store_const', const=True, - default=False, help='filter stdin through normalizer for testing') - parser.add_argument('-n', '--number', type=int, default=None, - help='only consider the test with the given number') - args = parser.parse_args(sys.argv[1:]) - -def print_test_header(headertext, example_number, start_line, end_line): - print("Example %d (lines %d-%d) %s" % (example_number,start_line,end_line,headertext)) - -def do_test(test, normalize, result_counts): - [retcode, actual_html, err] = cmark.to_html(test['markdown']) - if retcode == 0: - expected_html = test['html'] - unicode_error = None - if normalize: - try: - passed = normalize_html(actual_html) == normalize_html(expected_html) - except UnicodeDecodeError as e: - unicode_error = e - passed = False - else: - passed = actual_html == expected_html - if passed: - result_counts['pass'] += 1 - else: - print_test_header(test['section'], test['example'], test['start_line'], test['end_line']) - sys.stdout.write(test['markdown']) - if unicode_error: - print("Unicode error: " + str(unicode_error)) - print("Expected: " + repr(expected_html)) - print("Got: " + repr(actual_html)) - else: - expected_html_lines = expected_html.splitlines(True) - actual_html_lines = actual_html.splitlines(True) - for diffline in unified_diff(expected_html_lines, actual_html_lines, - "expected HTML", "actual HTML"): - sys.stdout.write(diffline) - sys.stdout.write('\n') - result_counts['fail'] += 1 - else: - print_test_header(test['section'], test['example'], test['start_line'], test['end_line']) - print("program returned error code %d" % retcode) - print(err) - result_counts['error'] += 1 - -def get_tests(specfile): - line_number = 0 - start_line = 0 - end_line = 0 - example_number = 0 - markdown_lines = [] - html_lines = [] - state = 0 # 0 regular text, 1 markdown example, 2 html output - headertext = '' - tests = [] - - header_re = re.compile('#+ ') - - with open(specfile, 'r', encoding='utf-8') as specf: - for line in specf: - line_number = line_number + 1 - if state == 0 and re.match(header_re, line): - headertext = header_re.sub('', line).strip() - if line.strip() == ".": - state = (state + 1) % 3 - if state == 0: - example_number = example_number + 1 - end_line = line_number - tests.append({ - "markdown":''.join(markdown_lines).replace('→',"\t"), - "html":''.join(html_lines), - "example": example_number, - "start_line": start_line, - "end_line": end_line, - "section": headertext}) - start_line = 0 - markdown_lines = [] - html_lines = [] - elif state == 1: - if start_line == 0: - start_line = line_number - 1 - markdown_lines.append(line) - elif state == 2: - html_lines.append(line) - return tests - -if __name__ == "__main__": - if args.debug_normalization: - print(normalize_html(sys.stdin.read())) - exit(0) - - all_tests = get_tests(args.spec) - if args.pattern: - pattern_re = re.compile(args.pattern, re.IGNORECASE) - else: - pattern_re = re.compile('.') - tests = [ test for test in all_tests if re.search(pattern_re, test['section']) and (not args.number or test['example'] == args.number) ] - if args.dump_tests: - print(json.dumps(tests, ensure_ascii=False, indent=2)) - exit(0) - else: - skipped = len(all_tests) - len(tests) - cmark = CMark(prog=args.program, library_dir=args.library_dir) - result_counts = {'pass': 0, 'fail': 0, 'error': 0, 'skip': skipped} - for test in tests: - do_test(test, args.normalize, result_counts) - print("{pass} passed, {fail} failed, {error} errored, {skip} skipped".format(**result_counts)) - if result_counts['fail'] == 0 and result_counts['error'] == 0: - exit(0) - else: - exit(1) |