  1. #!/usr/bin/env ruby
  2. require 'ffi'
  3. require 'stringio'
  4. require 'cgi'
  5. require 'set'
  6. require 'uri'
  7. module CMark
  8. extend FFI::Library
  9. ffi_lib ['libcmark', 'cmark']
  10. typedef :pointer, :node
  11. enum :node_type, [:document, :blockquote, :list, :list_item,
  12. :code_block, :html, :paragraph,
  13. :header, :hrule, :reference_def,
  14. :str, :softbreak, :linebreak, :code, :inline_html,
  15. :emph, :strong, :link, :image]
  16. enum :list_type, [:no_list, :bullet_list, :ordered_list]
  17. attach_function :cmark_free_nodes, [:node], :void
  18. attach_function :cmark_node_unlink, [:node], :void
  19. attach_function :cmark_markdown_to_html, [:string, :int], :string
  20. attach_function :cmark_parse_document, [:string, :int], :node
  21. attach_function :cmark_node_first_child, [:node], :node
  22. attach_function :cmark_node_parent, [:node], :node
  23. attach_function :cmark_node_next, [:node], :node
  24. attach_function :cmark_node_previous, [:node], :node
  25. attach_function :cmark_node_get_type, [:node], :node_type
  26. attach_function :cmark_node_get_string_content, [:node], :string
  27. attach_function :cmark_node_get_url, [:node], :string
  28. attach_function :cmark_node_get_title, [:node], :string
  29. attach_function :cmark_node_get_header_level, [:node], :int
  30. attach_function :cmark_node_get_list_type, [:node], :list_type
  31. attach_function :cmark_node_get_list_start, [:node], :int
  32. attach_function :cmark_node_get_list_tight, [:node], :bool
  33. end
  34. class Node
  35. attr_accessor :type, :children, :parent, :string_content, :header_level,
  36. :list_type, :list_start, :list_tight, :url, :title
  37. def initialize(pointer)
  38. if pointer.null?
  39. return nil
  40. end
  41. @pointer = pointer
  42. @type = CMark::cmark_node_get_type(pointer)
  43. @children = []
  44. @parent = nil
  45. first_child = CMark::cmark_node_first_child(pointer)
  46. b = first_child
  47. while !b.null?
  48. child = Node.new(b)
  49. child.parent = self
  50. @children << child
  51. b = CMark::cmark_node_next(b)
  52. end
  53. @string_content = CMark::cmark_node_get_string_content(pointer)
  54. if @type == :header
  55. @header_level = CMark::cmark_node_get_header_level(pointer)
  56. end
  57. if @type == :list
  58. @list_type = CMark::cmark_node_get_list_type(pointer)
  59. @list_start = CMark::cmark_node_get_list_start(pointer)
  60. @list_tight = CMark::cmark_node_get_list_tight(pointer)
  61. end
  62. if @type == :link || @type == :image
  63. @url = CMark::cmark_node_get_url(pointer)
  64. if !@url then @url = "" end
  65. @title = CMark::cmark_node_get_title(pointer)
  66. if !@title then @title = "" end
  67. end
  68. if @type == :document
  69. self.free
  70. end
  71. end
  72. # An iterator that "walks the tree," returning each node
  73. def walk(&blk)
  74. yield self
  75. self.children.each do |child|
  76. child.walk(&blk)
  77. end
  78. end
  79. # Walk the tree and transform it. blk should take one argument,
  80. # a node. If its value is a node, that node replaces the node being
  81. # visited. If its value is an array of nodes, those nodes are spliced
  82. # in place of the node being visited (so, to delete a node, use an
  83. # empty array). Otherwise the node is left as it is.
  84. def transform(&blk)
  85. self.walk do |node|
  86. skip = false
  87. res = blk.call(node)
  88. if res.kind_of?(Array)
  89. splice = res
  90. elsif res.kind_of?(Node)
  91. splice = [res]
  92. else
  93. skip = true
  94. end
  95. unless skip
  96. parent = node.parent
  97. if parent
  98. siblings = node.parent.children
  99. index = siblings.index(node)
  100. siblings.replace(siblings.slice(0,index) + splice +
  101. siblings.slice(index + 1, siblings.length))
  102. else # at the document root, just skip
  103. end
  104. end
  105. end
  106. end
  107. def self.parse_string(s)
  108. Node.new(CMark::cmark_parse_document(s, s.bytesize))
  109. end
  110. def self.parse_file(f)
  111. s = f.read()
  112. self.parse_string(s)
  113. end
  114. protected
  115. def free
  116. CMark::cmark_free_nodes(@pointer)
  117. end
  118. end
  119. class Renderer
  120. attr_accessor :in_tight, :warnings, :in_plain
  121. def initialize(stream = nil)
  122. if stream
  123. @stream = stream
  124. @stringwriter = false
  125. else
  126. @stringwriter = true
  127. @stream = StringIO.new
  128. end
  129. @need_blocksep = false
  130. @warnings = Set.new []
  131. @in_tight = false
  132. @in_plain = false
  133. end
  134. def out(*args)
  135. args.each do |arg|
  136. if arg.kind_of?(String)
  137. @stream.write(arg)
  138. elsif arg.kind_of?(Node)
  139. self.render(arg)
  140. elsif arg.kind_of?(Array)
  141. arg.each { |x| self.out(x) }
  142. else
  143. @stream.write(arg)
  144. end
  145. end
  146. end
  147. def render(node)
  148. @node = node
  149. if node.type == :document
  150. self.document(node)
  151. self.out("\n")
  152. if @stringwriter
  153. return @stream.string
  154. end
  155. elsif self.in_plain && node.type != :str && node.type != :softbreak
  156. # pass through looking for str, softbreak
  157. node.children.each do |child|
  158. render(child)
  159. end
  160. else
  161. begin
  162. self.send(node.type, node)
  163. rescue NoMethodError => e
  164. @warnings.add("WARNING: " + node.type.to_s + " not implemented.")
  165. raise e
  166. end
  167. end
  168. end
  169. def document(node)
  170. self.out(node.children)
  171. end
  172. def code_block(node)
  173. self.code_block(node)
  174. end
  175. def reference_def(node)
  176. end
  177. def blocksep
  178. self.out("\n")
  179. end
  180. def containersep
  181. if !self.in_tight
  182. self.out("\n")
  183. end
  184. end
  185. def block(&blk)
  186. if @need_blocksep
  187. self.blocksep
  188. end
  189. blk.call
  190. @need_blocksep = true
  191. end
  192. def container(starter, ender, &blk)
  193. self.out(starter)
  194. self.containersep
  195. @need_blocksep = false
  196. blk.call
  197. self.containersep
  198. self.out(ender)
  199. end
  200. def plain(&blk)
  201. old_in_plain = @in_plain
  202. @in_plain = true
  203. blk.call
  204. @in_plain = old_in_plain
  205. end
  206. end
  207. class HtmlRenderer < Renderer
  208. def header(node)
  209. block do
  210. self.out("<h", node.header_level, ">", node.children,
  211. "</h", node.header_level, ">")
  212. end
  213. end
  214. def paragraph(node)
  215. block do
  216. if self.in_tight
  217. self.out(node.children)
  218. else
  219. self.out("<p>", node.children, "</p>")
  220. end
  221. end
  222. end
  223. def list(node)
  224. old_in_tight = self.in_tight
  225. self.in_tight = node.list_tight
  226. block do
  227. if node.list_type == :bullet_list
  228. container("<ul>", "</ul>") do
  229. self.out(node.children)
  230. end
  231. else
  232. start = node.list_start == 1 ? '' :
  233. (' start="' + node.list_start.to_s + '"')
  234. container(start, "</ol>") do
  235. self.out(node.children)
  236. end
  237. end
  238. end
  239. self.in_tight = old_in_tight
  240. end
  241. def list_item(node)
  242. block do
  243. container("<li>", "</li>") do
  244. self.out(node.children)
  245. end
  246. end
  247. end
  248. def blockquote(node)
  249. block do
  250. container("<blockquote>", "</blockquote>") do
  251. self.out(node.children)
  252. end
  253. end
  254. end
  255. def hrule(node)
  256. block do
  257. self.out("<hr />")
  258. end
  259. end
  260. def code_block(node)
  261. block do
  262. self.out("<pre><code>")
  263. self.out(CGI.escapeHTML(node.string_content))
  264. self.out("</code></pre>")
  265. end
  266. end
  267. def html(node)
  268. block do
  269. self.out(node.string_content)
  270. end
  271. end
  272. def inline_html(node)
  273. self.out(node.string_content)
  274. end
  275. def emph(node)
  276. self.out("<em>", node.children, "</em>")
  277. end
  278. def strong(node)
  279. self.out("<strong>", node.children, "</strong>")
  280. end
  281. def link(node)
  282. self.out('<a href="', URI.escape(node.url), '"')
  283. if node.title && node.title.length > 0
  284. self.out(' title="', CGI.escapeHTML(node.title), '"')
  285. end
  286. self.out('>', node.children, '</a>')
  287. end
  288. def image(node)
  289. self.out('<img src="', URI.escape(node.url), '"')
  290. if node.title && node.title.length > 0
  291. self.out(' title="', CGI.escapeHTML(node.title), '"')
  292. end
  293. plain do
  294. self.out(' alt="', node.children, '" />')
  295. end
  296. end
  297. def str(node)
  298. self.out(CGI.escapeHTML(node.string_content))
  299. end
  300. def code(node)
  301. self.out("<code>")
  302. self.out(CGI.escapeHTML(node.string_content))
  303. self.out("</code>")
  304. end
  305. def linebreak(node)
  306. self.out("<br/>")
  307. self.softbreak(node)
  308. end
  309. def softbreak(node)
  310. self.out("\n")
  311. end
  312. end
  313. doc = Node.parse_file(ARGF)
  314. # Walk tree and print URLs for links
  315. doc.walk do |node|
  316. if node.type == :link
  317. printf("URL = %s\n", node.url)
  318. end
  319. end
  320. # Capitalize strings in headers
  321. doc.walk do |node|
  322. if node.type == :header
  323. node.walk do |subnode|
  324. if subnode.type == :str
  325. subnode.string_content = subnode.string_content.upcase
  326. end
  327. end
  328. end
  329. end
  330. # Walk tree and transform links to regular text
  331. doc.transform do |node|
  332. if node.type == :link
  333. node.children
  334. end
  335. end
  336. renderer = HtmlRenderer.new(STDOUT)
  337. renderer.render(doc)
  338. renderer.warnings.each do |w|
  339. STDERR.write(w)
  340. STDERR.write("\n")
  341. end
  342. # def markdown_to_html(s)
  343. # len = s.bytes.length
  344. # CMark::cmark_markdown_to_html(s, len)
  345. # end
  346. # print markdown_to_html(STDIN.read())