aboutsummaryrefslogtreecommitdiff
path: root/runtests.py
blob: 2cc00d16da0afc47f5391349b23e6f33ff97bc2d (plain)
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. from ctypes import CDLL, c_char_p, c_long
  4. import sys
  5. import platform
  6. from difflib import unified_diff
  7. from subprocess import *
  8. import argparse
  9. from HTMLParser import HTMLParser
  10. from htmlentitydefs import name2codepoint
  11. import re
  12. import cgi
  13. if __name__ == "__main__":
  14. parser = argparse.ArgumentParser(description='Run cmark tests.')
  15. parser.add_argument('--program', dest='program', nargs='?', default=None,
  16. help='program to test')
  17. parser.add_argument('--spec', dest='spec', nargs='?', default='spec.txt',
  18. help='path to spec')
  19. parser.add_argument('--pattern', dest='pattern', nargs='?',
  20. default=None, help='limit to sections matching regex pattern')
  21. parser.add_argument('--library-dir', dest='library_dir', nargs='?',
  22. default=None, help='directory containing dynamic library')
  23. parser.add_argument('--test-normalization', dest='test_normalization',
  24. action='store_const', const=True,
  25. default=False, help='filter stdin through normalizer for testing')
  26. args = parser.parse_args(sys.argv[1:])
  27. if not args.program:
  28. sysname = platform.system()
  29. libname = "libcmark"
  30. if sysname == 'Darwin':
  31. libname += ".dylib"
  32. elif sysname == 'Windows':
  33. libname += ".dll"
  34. else:
  35. libname += ".so"
  36. if args and args.library_dir:
  37. libpath = args.library_dir + "/" + libname
  38. else:
  39. libpath = "build/src/" + libname
  40. cmark = CDLL(libpath)
  41. markdown = cmark.cmark_markdown_to_html
  42. markdown.restype = c_char_p
  43. markdown.argtypes = [c_char_p, c_long]
  44. def md2html(text, prog):
  45. if prog:
  46. p1 = Popen([prog], stdout=PIPE, stdin=PIPE, stderr=PIPE)
  47. [result, err] = p1.communicate(input=text)
  48. return [p1.returncode, result, err]
  49. else:
  50. return [0, markdown(text, len(text)), '']
  51. # Normalization code, adapted from
  52. # https://github.com/karlcow/markdown-testsuite/
  53. significant_attrs = ["alt", "href", "src", "title"]
  54. normalize_whitespace_re = re.compile('\s+')
  55. class MyHTMLParser(HTMLParser):
  56. def __init__(self):
  57. HTMLParser.__init__(self)
  58. self.last = "starttag"
  59. self.in_pre = False
  60. self.output = u""
  61. self.last_tag = ""
  62. def handle_data(self, data):
  63. after_tag = self.last == "endtag" or self.last == "starttag"
  64. after_block_tag = after_tag and self.is_block_tag(self.last_tag)
  65. if after_tag and self.last_tag == "br":
  66. data = data.lstrip('\n')
  67. data = normalize_whitespace_re.sub(' ', data)
  68. if after_block_tag and not self.in_pre:
  69. if self.last == "starttag":
  70. data = data.lstrip()
  71. elif self.last == "endtag":
  72. data = data.strip()
  73. self.output += data
  74. self.last = "data"
  75. def handle_endtag(self, tag):
  76. if tag == "pre":
  77. self.in_pre = False
  78. if self.is_block_tag(tag):
  79. self.output = self.output.rstrip()
  80. self.output += "</" + tag + ">"
  81. self.last_tag = tag
  82. self.last = "endtag"
  83. def handle_starttag(self, tag, attrs):
  84. if tag == "pre":
  85. self.in_pre = True
  86. self.output += "<" + tag
  87. # For now we don't strip out 'extra' attributes, because of
  88. # raw HTML test cases.
  89. # attrs = filter(lambda attr: attr[0] in significant_attrs, attrs)
  90. if attrs:
  91. attrs.sort()
  92. for (k,v) in attrs:
  93. self.output += " " + k
  94. if v != None:
  95. self.output += ("=" + '"' + cgi.escape(v,quote=True) + '"')
  96. self.output += ">"
  97. self.last_tag = tag
  98. self.last = "starttag"
  99. def handle_startendtag(self, tag, attrs):
  100. """Ignore closing tag for self-closing """
  101. self.handle_starttag(tag, attrs)
  102. self.last_tag = tag
  103. self.last = "endtag"
  104. def handle_comment(self, data):
  105. self.output += '<!--' + data + '-->'
  106. self.last = "comment"
  107. def handle_decl(self, data):
  108. self.output += '<!' + data + '>'
  109. self.last = "decl"
  110. def handle_unknown_decl(self, data):
  111. self.output += '<!' + data + '>'
  112. self.last = "decl"
  113. def handle_pi(self,data):
  114. self.output += '<?' + data + '>'
  115. self.last = "pi"
  116. def handle_entityref(self, name):
  117. try:
  118. c = unichr(name2codepoint[name])
  119. except KeyError:
  120. c = None
  121. self.output_char(c, '&' + name + ';')
  122. self.last = "ref"
  123. def handle_charref(self, name):
  124. try:
  125. if name.startswith("x"):
  126. c = unichr(int(name[1:], 16))
  127. else:
  128. c = unichr(int(name))
  129. except ValueError:
  130. c = None
  131. self.output_char(c, '&' + name + ';')
  132. self.last = "ref"
  133. # Helpers.
  134. def output_char(self, c, fallback):
  135. if c == u'<':
  136. self.output += "&lt;"
  137. elif c == u'>':
  138. self.output += "&gt;"
  139. elif c == u'&':
  140. self.output += "&amp;"
  141. elif c == u'"':
  142. self.output += "&quot;"
  143. elif c == None:
  144. self.output += fallback
  145. else:
  146. self.output += c
  147. def is_block_tag(self,tag):
  148. return (tag in ['article', 'header', 'aside', 'hgroup', 'blockquote',
  149. 'hr', 'iframe', 'body', 'li', 'map', 'button', 'object', 'canvas',
  150. 'ol', 'caption', 'output', 'col', 'p', 'colgroup', 'pre', 'dd',
  151. 'progress', 'div', 'section', 'dl', 'table', 'td', 'dt',
  152. 'tbody', 'embed', 'textarea', 'fieldset', 'tfoot', 'figcaption',
  153. 'th', 'figure', 'thead', 'footer', 'tr', 'form', 'ul',
  154. 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'video', 'script', 'style'])
  155. def normalize(html):
  156. r"""
  157. Return normalized form of HTML which ignores insignificant output
  158. differences:
  159. * Multiple inner whitespaces are collapsed to a single space (except
  160. in pre tags).
  161. * Outer whitespace (outside block-level tags) is removed.
  162. * Self-closing tags are converted to open tags.
  163. * Attributes are sorted and lowercased.
  164. * References are converted to unicode, except that '<', '>', '&', and
  165. '&' are rendered using entities.
  166. Known limitations:
  167. * HTMLParser just swallows CDATA.
  168. * HTMLParser seems to treat unknown declarations as comments.
  169. """
  170. parser = MyHTMLParser()
  171. parser.feed(html.decode(encoding='UTF-8'))
  172. parser.close()
  173. return parser.output
  174. def print_test_header(headertext, example_number, start_line, end_line):
  175. print "Example %d (lines %d-%d) %s" % (example_number,start_line,end_line,headertext)
  176. def do_test(markdown_lines, expected_html_lines, headertext,
  177. example_number, start_line, end_line, prog=None):
  178. real_markdown_text = ''.join(markdown_lines).replace('→','\t')
  179. [retcode, actual_html, err] = md2html(real_markdown_text, prog)
  180. if retcode == 0:
  181. actual_html_lines = actual_html.splitlines(True)
  182. expected_html = ''.join(expected_html_lines)
  183. if normalize(actual_html) == normalize(expected_html):
  184. return 'pass'
  185. else:
  186. print_test_header(headertext, example_number,start_line,end_line)
  187. sys.stdout.write(real_markdown_text)
  188. for diffline in unified_diff(expected_html_lines, actual_html_lines,
  189. "expected HTML", "actual HTML"):
  190. sys.stdout.write(diffline)
  191. sys.stdout.write('\n')
  192. return 'fail'
  193. else:
  194. print_test_header(example_number,start_line,end_line)
  195. print "program returned error code %d" % retcode
  196. print(err)
  197. return 'error'
  198. def do_tests(specfile, prog, pattern):
  199. line_number = 0
  200. start_line = 0
  201. end_line = 0
  202. example_number = 0
  203. passed = 0
  204. failed = 0
  205. errored = 0
  206. markdown_lines = []
  207. html_lines = []
  208. active = True
  209. state = 0 # 0 regular text, 1 markdown example, 2 html output
  210. headertext = ''
  211. header_re = re.compile('#+ ')
  212. if pattern:
  213. pattern_re = re.compile(pattern, re.IGNORECASE)
  214. with open(specfile, 'r') as specf:
  215. for line in specf:
  216. line_number = line_number + 1
  217. if state == 0 and re.match(header_re, line):
  218. headertext = header_re.sub('', line).strip()
  219. if pattern:
  220. if re.search(pattern_re, line):
  221. active = True
  222. else:
  223. active = False
  224. if line.strip() == ".":
  225. state = (state + 1) % 3
  226. if state == 0:
  227. example_number = example_number + 1
  228. end_line = line_number
  229. if active:
  230. result = do_test(markdown_lines, html_lines,
  231. headertext, example_number,
  232. start_line, end_line, prog)
  233. if result == 'pass':
  234. passed = passed + 1
  235. elif result == 'fail':
  236. failed = failed + 1
  237. else:
  238. errored = errored + 1
  239. start_line = 0
  240. markdown_lines = []
  241. html_lines = []
  242. elif state == 1:
  243. if start_line == 0:
  244. start_line = line_number
  245. markdown_lines.append(line)
  246. elif state == 2:
  247. html_lines.append(line)
  248. print "%d passed, %d failed, %d errored" % (passed, failed, errored)
  249. return (failed == 0 and errored == 0)
  250. if __name__ == "__main__":
  251. if args.test_normalization:
  252. print normalize(sys.stdin.read())
  253. elif do_tests(args.spec, args.program, args.pattern):
  254. exit(0)
  255. else:
  256. exit(1)