aboutsummaryrefslogtreecommitdiff
path: root/commonmark.rb
blob: 0140c1b39552c067e1d3635d98a32170f2c89cc2 (plain)
  1. #!/usr/bin/env ruby
  2. require 'ffi'
  3. require 'stringio'
  4. require 'cgi'
  5. require 'set'
  6. require 'uri'
  7. module CMark
  8. extend FFI::Library
  9. ffi_lib ['libcmark', 'cmark']
  10. typedef :pointer, :node
  11. enum :node_type, [:document, :blockquote, :list, :list_item,
  12. :fenced_code, :indented_code, :html, :paragraph,
  13. :atx_header, :setext_header, :hrule, :reference_def,
  14. :str, :softbreak, :linebreak, :code, :inline_html,
  15. :emph, :strong, :link, :image]
  16. enum :list_type, [:no_list, :bullet_list, :ordered_list]
  17. attach_function :cmark_free_nodes, [:node], :void
  18. attach_function :cmark_node_unlink, [:node], :void
  19. attach_function :cmark_markdown_to_html, [:string, :int], :string
  20. attach_function :cmark_parse_document, [:string, :int], :node
  21. attach_function :cmark_node_first_child, [:node], :node
  22. attach_function :cmark_node_parent, [:node], :node
  23. attach_function :cmark_node_next, [:node], :node
  24. attach_function :cmark_node_previous, [:node], :node
  25. attach_function :cmark_node_get_type, [:node], :node_type
  26. attach_function :cmark_node_get_string_content, [:node], :string
  27. attach_function :cmark_node_get_url, [:node], :string
  28. attach_function :cmark_node_get_title, [:node], :string
  29. attach_function :cmark_node_get_header_level, [:node], :int
  30. attach_function :cmark_node_get_list_type, [:node], :list_type
  31. attach_function :cmark_node_get_list_start, [:node], :int
  32. attach_function :cmark_node_get_list_tight, [:node], :bool
  33. end
  34. class Node
  35. attr_accessor :type, :children, :parent, :string_content, :header_level,
  36. :list_type, :list_start, :list_tight, :url, :title
  37. def initialize(pointer)
  38. if pointer.null?
  39. return nil
  40. end
  41. @pointer = pointer
  42. @type = CMark::cmark_node_get_type(pointer)
  43. @children = []
  44. @parent = nil
  45. first_child = CMark::cmark_node_first_child(pointer)
  46. b = first_child
  47. while !b.null?
  48. child = Node.new(b)
  49. child.parent = self
  50. @children << child
  51. b = CMark::cmark_node_next(b)
  52. end
  53. @string_content = CMark::cmark_node_get_string_content(pointer)
  54. if @type == :atx_header || @type == :setext_header
  55. @header_level = CMark::cmark_node_get_header_level(pointer)
  56. end
  57. if @type == :list
  58. @list_type = CMark::cmark_node_get_list_type(pointer)
  59. @list_start = CMark::cmark_node_get_list_start(pointer)
  60. @list_tight = CMark::cmark_node_get_list_tight(pointer)
  61. end
  62. if @type == :link || @type == :image
  63. @url = CMark::cmark_node_get_url(pointer)
  64. if !@url then @url = "" end
  65. @title = CMark::cmark_node_get_title(pointer)
  66. if !@title then @title = "" end
  67. end
  68. if @type == :document
  69. self.free
  70. end
  71. end
  72. # An iterator that "walks the tree," returning each node
  73. def walk(&blk)
  74. yield self
  75. self.children.each do |child|
  76. child.walk(&blk)
  77. end
  78. end
  79. # Walk the tree and transform it. blk should take one argument,
  80. # a node. If its value is a node, that node replaces the node being
  81. # visited. If its value is an array of nodes, those nodes are spliced
  82. # in place of the node being visited (so, to delete a node, use an
  83. # empty array). Otherwise the node is left as it is.
  84. def transform(&blk)
  85. self.walk do |node|
  86. skip = false
  87. res = blk.call(node)
  88. if res.kind_of?(Array)
  89. splice = res
  90. elsif res.kind_of?(Node)
  91. splice = [res]
  92. else
  93. skip = true
  94. end
  95. unless skip
  96. parent = node.parent
  97. if parent
  98. siblings = node.parent.children
  99. index = siblings.index(node)
  100. siblings.replace(siblings.slice(0,index) + splice +
  101. siblings.slice(index + 1, siblings.length))
  102. else # at the document root, just skip
  103. end
  104. end
  105. end
  106. end
  107. def self.parse_string(s)
  108. Node.new(CMark::cmark_parse_document(s, s.bytesize))
  109. end
  110. def self.parse_file(f)
  111. s = f.read()
  112. self.parse_string(s)
  113. end
  114. protected
  115. def free
  116. CMark::cmark_free_nodes(@pointer)
  117. end
  118. end
  119. class Renderer
  120. attr_accessor :in_tight, :warnings, :in_plain
  121. def initialize(stream = nil)
  122. if stream
  123. @stream = stream
  124. @stringwriter = false
  125. else
  126. @stringwriter = true
  127. @stream = StringIO.new
  128. end
  129. @need_blocksep = false
  130. @warnings = Set.new []
  131. @in_tight = false
  132. @in_plain = false
  133. end
  134. def outf(format, *args)
  135. @stream.printf(format, *args)
  136. end
  137. def out(*args)
  138. args.each do |arg|
  139. if arg.kind_of?(String)
  140. @stream.write(arg)
  141. elsif arg.kind_of?(Node)
  142. self.render(arg)
  143. elsif arg.kind_of?(Array)
  144. arg.each { |x| self.out(x) }
  145. else
  146. @stream.write(arg)
  147. end
  148. end
  149. end
  150. def render(node)
  151. @node = node
  152. if node.type == :document
  153. self.document(node)
  154. self.out("\n")
  155. if @stringwriter
  156. return @stream.string
  157. end
  158. elsif self.in_plain && node.type != :str && node.type != :softbreak
  159. # pass through looking for str, softbreak
  160. node.children.each do |child|
  161. render(child)
  162. end
  163. else
  164. begin
  165. self.send(node.type, node)
  166. rescue NoMethodError => e
  167. @warnings.add("WARNING: " + node.type.to_s + " not implemented.")
  168. raise e
  169. end
  170. end
  171. end
  172. def document(node)
  173. self.out(node.children)
  174. end
  175. def indented_code(node)
  176. self.code_block(node)
  177. end
  178. def fenced_code(node)
  179. self.code_block(node)
  180. end
  181. def setext_header(node)
  182. self.header(node)
  183. end
  184. def atx_header(node)
  185. self.header(node)
  186. end
  187. def reference_def(node)
  188. end
  189. def blocksep
  190. self.out("\n")
  191. end
  192. def containersep
  193. if !self.in_tight
  194. self.out("\n")
  195. end
  196. end
  197. def block(&blk)
  198. if @need_blocksep
  199. self.blocksep
  200. end
  201. blk.call
  202. @need_blocksep = true
  203. end
  204. def container(starter, ender, &blk)
  205. self.out(starter)
  206. self.containersep
  207. @need_blocksep = false
  208. blk.call
  209. self.containersep
  210. self.out(ender)
  211. end
  212. def plain(&blk)
  213. old_in_plain = @in_plain
  214. @in_plain = true
  215. blk.call
  216. @in_plain = old_in_plain
  217. end
  218. end
  219. class HtmlRenderer < Renderer
  220. def header(node)
  221. block do
  222. self.out("<h", node.header_level, ">", node.children,
  223. "</h", node.header_level, ">")
  224. end
  225. end
  226. def paragraph(node)
  227. block do
  228. if self.in_tight
  229. self.out(node.children)
  230. else
  231. self.out("<p>", node.children, "</p>")
  232. end
  233. end
  234. end
  235. def list(node)
  236. old_in_tight = self.in_tight
  237. self.in_tight = node.list_tight
  238. block do
  239. if node.list_type == :bullet_list
  240. container("<ul>", "</ul>") do
  241. self.out(node.children)
  242. end
  243. else
  244. start = node.list_start == 1 ? '' :
  245. (' start="' + node.list_start.to_s + '"')
  246. container(start, "</ol>") do
  247. self.out(node.children)
  248. end
  249. end
  250. end
  251. self.in_tight = old_in_tight
  252. end
  253. def list_item(node)
  254. block do
  255. container("<li>", "</li>") do
  256. self.out(node.children)
  257. end
  258. end
  259. end
  260. def blockquote(node)
  261. block do
  262. container("<blockquote>", "</blockquote>") do
  263. self.out(node.children)
  264. end
  265. end
  266. end
  267. def hrule(node)
  268. block do
  269. self.out("<hr />")
  270. end
  271. end
  272. def code_block(node)
  273. block do
  274. self.out("<pre><code>")
  275. self.out(CGI.escapeHTML(node.string_content))
  276. self.out("</code></pre>")
  277. end
  278. end
  279. def html(node)
  280. block do
  281. self.out(node.string_content)
  282. end
  283. end
  284. def inline_html(node)
  285. self.out(node.string_content)
  286. end
  287. def emph(node)
  288. self.out("<em>", node.children, "</em>")
  289. end
  290. def strong(node)
  291. self.out("<strong>", node.children, "</strong>")
  292. end
  293. def link(node)
  294. self.out('<a href="', URI.escape(node.url), '"')
  295. if node.title && node.title.length > 0
  296. self.out(' title="', CGI.escapeHTML(node.title), '"')
  297. end
  298. self.out('>', node.children, '</a>')
  299. end
  300. def image(node)
  301. self.out('<img src="', URI.escape(node.url), '"')
  302. if node.title && node.title.length > 0
  303. self.out(' title="', CGI.escapeHTML(node.title), '"')
  304. end
  305. plain do
  306. self.out(' alt="', node.children, '" />')
  307. end
  308. end
  309. def str(node)
  310. self.out(CGI.escapeHTML(node.string_content))
  311. end
  312. def code(node)
  313. self.out("<code>")
  314. self.out(CGI.escapeHTML(node.string_content))
  315. self.out("</code>")
  316. end
  317. def linebreak(node)
  318. self.out("<br/>")
  319. self.softbreak(node)
  320. end
  321. def softbreak(node)
  322. self.out("\n")
  323. end
  324. end
  325. doc = Node.parse_file(ARGF)
  326. # Walk tree and print URLs for links
  327. doc.walk do |node|
  328. if node.type == :link
  329. printf("URL = %s\n", node.url)
  330. end
  331. end
  332. # Capitalize strings in headers
  333. doc.walk do |node|
  334. if node.type == :setext_header or node.type == :atx_header
  335. node.walk do |subnode|
  336. if subnode.type == :str
  337. subnode.string_content = subnode.string_content.upcase
  338. end
  339. end
  340. end
  341. end
  342. # Walk tree and transform links to regular text
  343. doc.transform do |node|
  344. if node.type == :link
  345. node.children
  346. end
  347. end
  348. renderer = HtmlRenderer.new(STDOUT)
  349. renderer.render(doc)
  350. renderer.warnings.each do |w|
  351. STDERR.write(w)
  352. STDERR.write("\n")
  353. end
  354. # def markdown_to_html(s)
  355. # len = s.bytes.length
  356. # CMark::cmark_markdown_to_html(s, len)
  357. # end
  358. # print markdown_to_html(STDIN.read())