aboutsummaryrefslogtreecommitdiff
path: root/runtests.py
blob: d41aace99805c70c47747c3c013ed3ac53a229ee (plain)
  1. #!/usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. from ctypes import CDLL, c_char_p, c_long
  4. import sys
  5. import platform
  6. from difflib import unified_diff
  7. from subprocess import *
  8. import argparse
  9. from HTMLParser import HTMLParser
  10. from htmlentitydefs import name2codepoint
  11. import re
  12. import cgi
  13. if __name__ == "__main__":
  14. parser = argparse.ArgumentParser(description='Run cmark tests.')
  15. parser.add_argument('--program', dest='program', nargs='?', default=None,
  16. help='program to test')
  17. parser.add_argument('--spec', dest='spec', nargs='?', default='spec.txt',
  18. help='path to spec')
  19. parser.add_argument('--pattern', dest='pattern', nargs='?',
  20. default=None, help='limit to sections matching regex pattern')
  21. parser.add_argument('--library_dir', dest='library_dir', nargs='?',
  22. default=None, help='directory containing dynamic library')
  23. args = parser.parse_args(sys.argv[1:])
  24. if not args.program:
  25. sysname = platform.system()
  26. libname = "libcmark"
  27. if sysname == 'Darwin':
  28. libname += ".dylib"
  29. elif sysname == 'Windows':
  30. libname += ".dll"
  31. else:
  32. libname += ".so"
  33. if args and args.library_dir:
  34. libpath = args.library_dir + "/" + libname
  35. else:
  36. libpath = "build/src/" + libname
  37. cmark = CDLL(libpath)
  38. markdown = cmark.cmark_markdown_to_html
  39. markdown.restype = c_char_p
  40. markdown.argtypes = [c_char_p, c_long]
  41. def md2html(text, prog):
  42. if prog:
  43. p1 = Popen([prog], stdout=PIPE, stdin=PIPE, stderr=PIPE)
  44. [result, err] = p1.communicate(input=text)
  45. return [p1.returncode, result, err]
  46. else:
  47. return [0, markdown(text, len(text)), '']
  48. # Normalization code, adapted from
  49. # https://github.com/karlcow/markdown-testsuite/
  50. significant_attrs = ["alt", "href", "src", "title"]
  51. normalize_whitespace_re = re.compile('\s+')
  52. normalize_newline_re = re.compile('^\s*')
  53. class MyHTMLParser(HTMLParser):
  54. def __init__(self):
  55. HTMLParser.__init__(self)
  56. self.last = "starttag"
  57. self.in_pre = False
  58. self.output = u""
  59. self.last_tag = ""
  60. def handle_data(self, data):
  61. after_tag = self.last == "endtag" or self.last == "starttag"
  62. after_block_tag = after_tag and self.is_block_tag(self.last_tag)
  63. if after_block_tag and not self.in_pre:
  64. data = normalize_whitespace_re.sub(' ', data)
  65. if self.last == "starttag":
  66. data = data.lstrip()
  67. elif self.last == "endtag":
  68. data = data.strip()
  69. elif after_tag and self.last_tag == "br":
  70. data = normalize_newline_re.sub('\n', data)
  71. self.output += data
  72. self.last = "data"
  73. def handle_endtag(self, tag):
  74. if tag == "pre":
  75. self.in_pre = False
  76. if self.is_block_tag(tag):
  77. self.output = self.output.rstrip()
  78. self.output += "</" + tag + ">"
  79. self.last_tag = tag
  80. self.last = "endtag"
  81. def handle_starttag(self, tag, attrs):
  82. if tag == "pre":
  83. self.in_pre = True
  84. self.output += "<" + tag
  85. # For now we don't strip out 'extra' attributes, because of
  86. # raw HTML test cases.
  87. # attrs = filter(lambda attr: attr[0] in significant_attrs, attrs)
  88. if attrs:
  89. attrs.sort()
  90. for (k,v) in attrs:
  91. self.output += " " + k
  92. if v != None:
  93. self.output += ("=" + '"' + cgi.escape(v,quote=True) + '"')
  94. self.output += ">"
  95. self.last_tag = tag
  96. self.last = "starttag"
  97. def handle_startendtag(self, tag, attrs):
  98. """Ignore closing tag for self-closing """
  99. self.handle_starttag(tag, attrs)
  100. self.last_tag = tag
  101. self.last = "endtag"
  102. def handle_comment(self, data):
  103. self.output += '<!--' + data + '-->'
  104. self.last = "comment"
  105. def handle_decl(self, data):
  106. self.output += '<!' + data + '>'
  107. self.last = "decl"
  108. def handle_unknown_decl(self, data):
  109. self.output += '<!' + data + '>'
  110. self.last = "decl"
  111. def handle_pi(self,data):
  112. self.output += '<?' + data + '>'
  113. self.last = "pi"
  114. def handle_entityref(self, name):
  115. try:
  116. c = unichr(name2codepoint[name])
  117. except KeyError:
  118. c = None
  119. self.output_char(c, '&' + name + ';')
  120. self.last = "ref"
  121. def handle_charref(self, name):
  122. try:
  123. if name.startswith("x"):
  124. c = unichr(int(name[1:], 16))
  125. else:
  126. c = unichr(int(name))
  127. except ValueError:
  128. c = None
  129. self.output_char(c, '&' + name + ';')
  130. self.last = "ref"
  131. # Helpers.
  132. def output_char(self, c, fallback):
  133. if c == u'<':
  134. self.output += "&lt;"
  135. elif c == u'>':
  136. self.output += "&gt;"
  137. elif c == u'&':
  138. self.output += "&amp;"
  139. elif c == u'"':
  140. self.output += "&quot;"
  141. elif c == None:
  142. self.output += fallback
  143. else:
  144. self.output += c
  145. def is_block_tag(self,tag):
  146. return (tag in ['article', 'header', 'aside', 'hgroup', 'blockquote',
  147. 'hr', 'iframe', 'body', 'li', 'map', 'button', 'object', 'canvas',
  148. 'ol', 'caption', 'output', 'col', 'p', 'colgroup', 'pre', 'dd',
  149. 'progress', 'div', 'section', 'dl', 'table', 'td', 'dt',
  150. 'tbody', 'embed', 'textarea', 'fieldset', 'tfoot', 'figcaption',
  151. 'th', 'figure', 'thead', 'footer', 'tr', 'form', 'ul',
  152. 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'video', 'script', 'style'])
  153. def normalize(html):
  154. r"""
  155. Return normalized form of HTML which ignores insignificant output
  156. differences:
  157. * Multiple inner whitespaces are collapsed to a single space (except
  158. in pre tags).
  159. * Outer whitespace (outside block-level tags) is removed.
  160. * Self-closing tags are converted to open tags.
  161. * Attributes are sorted and lowercased.
  162. * References are converted to unicode, except that '<', '>', '&', and
  163. '&' are rendered using entities.
  164. Known limitations:
  165. * HTMLParser just swallows CDATA.
  166. * HTMLParser seems to treat unknown declarations as comments.
  167. """
  168. parser = MyHTMLParser()
  169. parser.feed(html.decode(encoding='UTF-8'))
  170. parser.close()
  171. return parser.output
  172. def print_test_header(headertext, example_number, start_line, end_line):
  173. print "Example %d (lines %d-%d) %s" % (example_number,start_line,end_line,headertext)
  174. def do_test(markdown_lines, expected_html_lines, headertext,
  175. example_number, start_line, end_line, prog=None):
  176. real_markdown_text = ''.join(markdown_lines).replace('→','\t')
  177. [retcode, actual_html, err] = md2html(real_markdown_text, prog)
  178. if retcode == 0:
  179. actual_html_lines = actual_html.splitlines(True)
  180. expected_html = ''.join(expected_html_lines)
  181. if normalize(actual_html) == normalize(expected_html):
  182. return 'pass'
  183. else:
  184. print_test_header(headertext, example_number,start_line,end_line)
  185. sys.stdout.write(real_markdown_text)
  186. for diffline in unified_diff(expected_html_lines, actual_html_lines,
  187. "expected HTML", "actual HTML"):
  188. sys.stdout.write(diffline)
  189. sys.stdout.write('\n')
  190. return 'fail'
  191. else:
  192. print_test_header(example_number,start_line,end_line)
  193. print "program returned error code %d" % retcode
  194. print(err)
  195. return 'error'
  196. def do_tests(specfile, prog, pattern):
  197. line_number = 0
  198. start_line = 0
  199. end_line = 0
  200. example_number = 0
  201. passed = 0
  202. failed = 0
  203. errored = 0
  204. markdown_lines = []
  205. html_lines = []
  206. active = True
  207. state = 0 # 0 regular text, 1 markdown example, 2 html output
  208. headertext = ''
  209. header_re = re.compile('#+ ')
  210. if pattern:
  211. pattern_re = re.compile(pattern, re.IGNORECASE)
  212. with open(specfile, 'r') as specf:
  213. for line in specf:
  214. line_number = line_number + 1
  215. if state == 0 and re.match(header_re, line):
  216. headertext = header_re.sub('', line).strip()
  217. if pattern:
  218. if re.search(pattern_re, line):
  219. active = True
  220. else:
  221. active = False
  222. if line.strip() == ".":
  223. state = (state + 1) % 3
  224. if state == 0:
  225. example_number = example_number + 1
  226. end_line = line_number
  227. if active:
  228. result = do_test(markdown_lines, html_lines,
  229. headertext, example_number,
  230. start_line, end_line, prog)
  231. if result == 'pass':
  232. passed = passed + 1
  233. elif result == 'fail':
  234. failed = failed + 1
  235. else:
  236. errored = errored + 1
  237. start_line = 0
  238. markdown_lines = []
  239. html_lines = []
  240. elif state == 1:
  241. if start_line == 0:
  242. start_line = line_number
  243. markdown_lines.append(line)
  244. elif state == 2:
  245. html_lines.append(line)
  246. print "%d passed, %d failed, %d errored" % (passed, failed, errored)
  247. return (failed == 0 and errored == 0)
  248. if __name__ == "__main__":
  249. if do_tests(args.spec, args.program, args.pattern):
  250. exit(0)
  251. else:
  252. exit(1)