aboutsummaryrefslogtreecommitdiff
path: root/commonmark.rb
blob: dead84c3a6f76772176192ddb44c3c6b4d138bf2 (plain)
  1. #!/usr/bin/env ruby
  2. require 'ffi'
  3. require 'stringio'
  4. require 'cgi'
  5. require 'set'
  6. require 'uri'
  7. module CMark
  8. extend FFI::Library
  9. ffi_lib ['libcmark', 'cmark']
  10. typedef :pointer, :node
  11. enum :node_type, [:document, :blockquote, :list, :list_item,
  12. :code_block, :html, :paragraph,
  13. :header, :hrule, :reference_def,
  14. :str, :softbreak, :linebreak, :code, :inline_html,
  15. :emph, :strong, :link, :image]
  16. enum :list_type, [:no_list, :bullet_list, :ordered_list]
  17. attach_function :cmark_free_nodes, [:node], :void
  18. attach_function :cmark_node_unlink, [:node], :void
  19. attach_function :cmark_markdown_to_html, [:string, :int], :string
  20. attach_function :cmark_parse_document, [:string, :int], :node
  21. attach_function :cmark_node_first_child, [:node], :node
  22. attach_function :cmark_node_parent, [:node], :node
  23. attach_function :cmark_node_next, [:node], :node
  24. attach_function :cmark_node_previous, [:node], :node
  25. attach_function :cmark_node_get_type, [:node], :node_type
  26. attach_function :cmark_node_get_string_content, [:node], :string
  27. attach_function :cmark_node_get_url, [:node], :string
  28. attach_function :cmark_node_get_title, [:node], :string
  29. attach_function :cmark_node_get_header_level, [:node], :int
  30. attach_function :cmark_node_get_list_type, [:node], :list_type
  31. attach_function :cmark_node_get_list_start, [:node], :int
  32. attach_function :cmark_node_get_list_tight, [:node], :bool
  33. end
  34. class Node
  35. attr_accessor :type, :children, :parent, :string_content, :header_level,
  36. :list_type, :list_start, :list_tight, :url, :title
  37. def initialize(pointer)
  38. if pointer.null?
  39. return nil
  40. end
  41. @pointer = pointer
  42. @type = CMark::cmark_node_get_type(pointer)
  43. @children = []
  44. @parent = nil
  45. first_child = CMark::cmark_node_first_child(pointer)
  46. b = first_child
  47. while !b.null?
  48. child = Node.new(b)
  49. child.parent = self
  50. @children << child
  51. b = CMark::cmark_node_next(b)
  52. end
  53. @string_content = CMark::cmark_node_get_string_content(pointer)
  54. if @type == :header
  55. @header_level = CMark::cmark_node_get_header_level(pointer)
  56. end
  57. if @type == :list
  58. @list_type = CMark::cmark_node_get_list_type(pointer)
  59. @list_start = CMark::cmark_node_get_list_start(pointer)
  60. @list_tight = CMark::cmark_node_get_list_tight(pointer)
  61. end
  62. if @type == :link || @type == :image
  63. @url = CMark::cmark_node_get_url(pointer)
  64. if !@url then @url = "" end
  65. @title = CMark::cmark_node_get_title(pointer)
  66. if !@title then @title = "" end
  67. end
  68. if @type == :document
  69. self.free
  70. end
  71. end
  72. # An iterator that "walks the tree," returning each node
  73. def walk(&blk)
  74. yield self
  75. self.children.each do |child|
  76. child.walk(&blk)
  77. end
  78. end
  79. # Walk the tree and transform it. blk should take one argument,
  80. # a node. If its value is a node, that node replaces the node being
  81. # visited. If its value is an array of nodes, those nodes are spliced
  82. # in place of the node being visited (so, to delete a node, use an
  83. # empty array). Otherwise the node is left as it is.
  84. def transform(&blk)
  85. self.walk do |node|
  86. skip = false
  87. res = blk.call(node)
  88. if res.kind_of?(Array)
  89. splice = res
  90. elsif res.kind_of?(Node)
  91. splice = [res]
  92. else
  93. skip = true
  94. end
  95. unless skip
  96. parent = node.parent
  97. if parent
  98. siblings = node.parent.children
  99. index = siblings.index(node)
  100. siblings.replace(siblings.slice(0,index) + splice +
  101. siblings.slice(index + 1, siblings.length))
  102. else # at the document root, just skip
  103. end
  104. end
  105. end
  106. end
  107. def self.parse_string(s)
  108. Node.new(CMark::cmark_parse_document(s, s.bytesize))
  109. end
  110. def self.parse_file(f)
  111. s = f.read()
  112. self.parse_string(s)
  113. end
  114. protected
  115. def free
  116. CMark::cmark_free_nodes(@pointer)
  117. end
  118. end
  119. class Renderer
  120. attr_accessor :in_tight, :warnings, :in_plain
  121. def initialize(stream = nil)
  122. if stream
  123. @stream = stream
  124. @stringwriter = false
  125. else
  126. @stringwriter = true
  127. @stream = StringIO.new
  128. end
  129. @need_blocksep = false
  130. @warnings = Set.new []
  131. @in_tight = false
  132. @in_plain = false
  133. end
  134. def outf(format, *args)
  135. @stream.printf(format, *args)
  136. end
  137. def out(*args)
  138. args.each do |arg|
  139. if arg.kind_of?(String)
  140. @stream.write(arg)
  141. elsif arg.kind_of?(Node)
  142. self.render(arg)
  143. elsif arg.kind_of?(Array)
  144. arg.each { |x| self.out(x) }
  145. else
  146. @stream.write(arg)
  147. end
  148. end
  149. end
  150. def render(node)
  151. @node = node
  152. if node.type == :document
  153. self.document(node)
  154. self.out("\n")
  155. if @stringwriter
  156. return @stream.string
  157. end
  158. elsif self.in_plain && node.type != :str && node.type != :softbreak
  159. # pass through looking for str, softbreak
  160. node.children.each do |child|
  161. render(child)
  162. end
  163. else
  164. begin
  165. self.send(node.type, node)
  166. rescue NoMethodError => e
  167. @warnings.add("WARNING: " + node.type.to_s + " not implemented.")
  168. raise e
  169. end
  170. end
  171. end
  172. def document(node)
  173. self.out(node.children)
  174. end
  175. def code_block(node)
  176. self.code_block(node)
  177. end
  178. def reference_def(node)
  179. end
  180. def blocksep
  181. self.out("\n")
  182. end
  183. def containersep
  184. if !self.in_tight
  185. self.out("\n")
  186. end
  187. end
  188. def block(&blk)
  189. if @need_blocksep
  190. self.blocksep
  191. end
  192. blk.call
  193. @need_blocksep = true
  194. end
  195. def container(starter, ender, &blk)
  196. self.out(starter)
  197. self.containersep
  198. @need_blocksep = false
  199. blk.call
  200. self.containersep
  201. self.out(ender)
  202. end
  203. def plain(&blk)
  204. old_in_plain = @in_plain
  205. @in_plain = true
  206. blk.call
  207. @in_plain = old_in_plain
  208. end
  209. end
  210. class HtmlRenderer < Renderer
  211. def header(node)
  212. block do
  213. self.out("<h", node.header_level, ">", node.children,
  214. "</h", node.header_level, ">")
  215. end
  216. end
  217. def paragraph(node)
  218. block do
  219. if self.in_tight
  220. self.out(node.children)
  221. else
  222. self.out("<p>", node.children, "</p>")
  223. end
  224. end
  225. end
  226. def list(node)
  227. old_in_tight = self.in_tight
  228. self.in_tight = node.list_tight
  229. block do
  230. if node.list_type == :bullet_list
  231. container("<ul>", "</ul>") do
  232. self.out(node.children)
  233. end
  234. else
  235. start = node.list_start == 1 ? '' :
  236. (' start="' + node.list_start.to_s + '"')
  237. container(start, "</ol>") do
  238. self.out(node.children)
  239. end
  240. end
  241. end
  242. self.in_tight = old_in_tight
  243. end
  244. def list_item(node)
  245. block do
  246. container("<li>", "</li>") do
  247. self.out(node.children)
  248. end
  249. end
  250. end
  251. def blockquote(node)
  252. block do
  253. container("<blockquote>", "</blockquote>") do
  254. self.out(node.children)
  255. end
  256. end
  257. end
  258. def hrule(node)
  259. block do
  260. self.out("<hr />")
  261. end
  262. end
  263. def code_block(node)
  264. block do
  265. self.out("<pre><code>")
  266. self.out(CGI.escapeHTML(node.string_content))
  267. self.out("</code></pre>")
  268. end
  269. end
  270. def html(node)
  271. block do
  272. self.out(node.string_content)
  273. end
  274. end
  275. def inline_html(node)
  276. self.out(node.string_content)
  277. end
  278. def emph(node)
  279. self.out("<em>", node.children, "</em>")
  280. end
  281. def strong(node)
  282. self.out("<strong>", node.children, "</strong>")
  283. end
  284. def link(node)
  285. self.out('<a href="', URI.escape(node.url), '"')
  286. if node.title && node.title.length > 0
  287. self.out(' title="', CGI.escapeHTML(node.title), '"')
  288. end
  289. self.out('>', node.children, '</a>')
  290. end
  291. def image(node)
  292. self.out('<img src="', URI.escape(node.url), '"')
  293. if node.title && node.title.length > 0
  294. self.out(' title="', CGI.escapeHTML(node.title), '"')
  295. end
  296. plain do
  297. self.out(' alt="', node.children, '" />')
  298. end
  299. end
  300. def str(node)
  301. self.out(CGI.escapeHTML(node.string_content))
  302. end
  303. def code(node)
  304. self.out("<code>")
  305. self.out(CGI.escapeHTML(node.string_content))
  306. self.out("</code>")
  307. end
  308. def linebreak(node)
  309. self.out("<br/>")
  310. self.softbreak(node)
  311. end
  312. def softbreak(node)
  313. self.out("\n")
  314. end
  315. end
  316. doc = Node.parse_file(ARGF)
  317. # Walk tree and print URLs for links
  318. doc.walk do |node|
  319. if node.type == :link
  320. printf("URL = %s\n", node.url)
  321. end
  322. end
  323. # Capitalize strings in headers
  324. doc.walk do |node|
  325. if node.type == :header
  326. node.walk do |subnode|
  327. if subnode.type == :str
  328. subnode.string_content = subnode.string_content.upcase
  329. end
  330. end
  331. end
  332. end
  333. # Walk tree and transform links to regular text
  334. doc.transform do |node|
  335. if node.type == :link
  336. node.children
  337. end
  338. end
  339. renderer = HtmlRenderer.new(STDOUT)
  340. renderer.render(doc)
  341. renderer.warnings.each do |w|
  342. STDERR.write(w)
  343. STDERR.write("\n")
  344. end
  345. # def markdown_to_html(s)
  346. # len = s.bytes.length
  347. # CMark::cmark_markdown_to_html(s, len)
  348. # end
  349. # print markdown_to_html(STDIN.read())