summaryrefslogtreecommitdiff
path: root/IkiWiki/Plugin/pandoc.pm
blob: 25081ef1207d1203637f3f345a2e688bb881d82d (plain)
  1. #!/usr/bin/env perl
  2. package IkiWiki::Plugin::pandoc;
  3. use warnings;
  4. use strict;
  5. use IkiWiki;
  6. use FileHandle;
  7. use IPC::Open2;
  8. use File::Path qw/make_path/;
  9. use JSON;
  10. # activate with 'generate_$format' in meta; turn on all with 'generate_all_formats'.
  11. my %extra_formats = (
  12. pdf => { ext=>'pdf', label=>'PDF', format=>'latex', extra=>[], order=>1 },
  13. docx => { ext=>'docx', label=>'DOCX', format=>'docx', extra=>[], order=>2 },
  14. odt => { ext=>'odt', label=>'ODT', format=>'odt', extra=>[], order=>3 },
  15. beamer => { ext=>'beamer.pdf', label=>'Beamer', format=>'beamer', extra=>[], order=>4 },
  16. revealjs => { ext=>'revealjs.html', label=>'RevealJS', format=>'revealjs', extra=>['--self-contained'], order=>5 },
  17. epub => { ext=>'epub', label=>'EPUB', format=>'epub3', extra=>[], order=>6 },
  18. latex => { ext=>'tex', label=>'LaTeX', format=>'latex', extra=>['--standalone'], order=>7 },
  19. );
  20. sub import {
  21. my $markdown_ext = $config{pandoc_markdown_ext} || "mdwn";
  22. # May be both a string with a single value, a string containing commas or an arrayref
  23. if ($markdown_ext =~ /,/) {
  24. $markdown_ext = [split /\s*,\s*/, $markdown_ext];
  25. }
  26. hook(type => "getsetup", id => "pandoc", call => \&getsetup);
  27. hook(type => "pagetemplate", id => "pandoc", call => \&pagetemplate);
  28. hook(type => "pageactions", id => "pandoc", call => \&pageactions);
  29. if (ref $markdown_ext eq 'ARRAY') {
  30. foreach my $mde (@$markdown_ext) {
  31. hook(type => 'htmlize', id => $mde,
  32. call => sub{ htmlize("markdown", @_) });
  33. }
  34. } else {
  35. hook(type => "htmlize", id => $markdown_ext,
  36. call => sub { htmlize("markdown", @_) });
  37. }
  38. if ($config{pandoc_latex}) {
  39. hook(type => "htmlize", id => "tex",
  40. call => sub { htmlize("latex", @_) });
  41. }
  42. if ($config{pandoc_rst}) {
  43. hook(type => "htmlize", id => "rst",
  44. call => sub { htmlize("rst", @_) });
  45. }
  46. if ($config{pandoc_textile}) {
  47. hook(type => "htmlize", id => "textile",
  48. call => sub { htmlize("textile", @_) });
  49. }
  50. if ($config{pandoc_mediawiki}) {
  51. hook(type => "htmlize", id => "mediawiki",
  52. call => sub { htmlize("mediawiki", @_) });
  53. }
  54. if ($config{pandoc_opml}) {
  55. hook(type => "htmlize", id => "opml",
  56. call => sub { htmlize("opml", @_) });
  57. }
  58. if ($config{pandoc_org}) {
  59. hook(type => "htmlize", id => "org",
  60. call => sub { htmlize("org", @_) });
  61. }
  62. }
  63. sub getsetup () {
  64. return
  65. plugin => {
  66. safe => 1,
  67. rebuild => 1,
  68. },
  69. pandoc_command => {
  70. type => "string",
  71. example => "/usr/local/bin/pandoc",
  72. description => "Path to pandoc executable",
  73. safe => 1,
  74. rebuild => 0,
  75. },
  76. pandoc_citeproc => {
  77. type => "string",
  78. example => "/usr/local/bin/pandoc-citeproc",
  79. description => "Path to pandoc-citeproc executable",
  80. safe => 1,
  81. rebuild => 0,
  82. },
  83. pandoc_markdown_ext => {
  84. type => "string",
  85. example => "mdwn,md,markdown",
  86. description => "File extension(s) for Markdown files handled by Pandoc",
  87. safe => 1,
  88. rebuild => 1,
  89. },
  90. pandoc_latex => {
  91. type => "boolean",
  92. example => 0,
  93. description => "Enable Pandoc processing of LaTeX documents (extension=tex)",
  94. safe => 1,
  95. rebuild => 1,
  96. },
  97. pandoc_rst => {
  98. type => "boolean",
  99. example => 0,
  100. description => "Enable Pandoc processing of reStructuredText documents (extension=rst)",
  101. safe => 1,
  102. rebuild => 1,
  103. },
  104. pandoc_textile => {
  105. type => "boolean",
  106. example => 0,
  107. description => "Enable Pandoc processing of Textile documents (extension=textile)",
  108. safe => 1,
  109. rebuild => 1,
  110. },
  111. pandoc_mediawiki => {
  112. type => "boolean",
  113. example => 0,
  114. description => "Enable Pandoc processing of MediaWiki documents (extension=mediawiki)",
  115. safe => 1,
  116. rebuild => 1,
  117. },
  118. pandoc_org => {
  119. type => "boolean",
  120. example => 0,
  121. description => "Enable Pandoc processing of Emacs org-mode documents (extension=org)",
  122. safe => 1,
  123. rebuild => 1,
  124. },
  125. pandoc_opml => {
  126. type => "boolean",
  127. example => 0,
  128. description => "Enable Pandoc processing of OPML documents (extension=opml)",
  129. safe => 1,
  130. rebuild => 1,
  131. },
  132. pandoc_smart => {
  133. type => "boolean",
  134. example => 1,
  135. description => "Use smart quotes, dashes, and ellipses",
  136. safe => 1,
  137. rebuild => 1,
  138. },
  139. pandoc_obfuscate => {
  140. type => "boolean",
  141. example => 1,
  142. description => "Obfuscate emails",
  143. safe => 1,
  144. rebuild => 1,
  145. },
  146. pandoc_html5 => {
  147. type => "boolean",
  148. example => 0,
  149. description => "Generate HTML5",
  150. safe => 1,
  151. rebuild => 1,
  152. },
  153. pandoc_ascii => {
  154. type => "boolean",
  155. example => 0,
  156. description => "Generate ASCII instead of UTF8",
  157. safe => 1,
  158. rebuild => 1,
  159. },
  160. pandoc_html_extra_options => {
  161. type => "internal",
  162. default => [],
  163. description => "List of extra pandoc options for html",
  164. safe => 0,
  165. rebuild => 0,
  166. },
  167. pandoc_numsect => {
  168. type => "boolean",
  169. example => 0,
  170. description => "Number sections",
  171. safe => 1,
  172. rebuild => 1,
  173. },
  174. pandoc_sectdiv => {
  175. type => "boolean",
  176. example => 0,
  177. description => "Attach IDs to section DIVs instead of Headers",
  178. safe => 1,
  179. rebuild => 1,
  180. },
  181. pandoc_codeclasses => {
  182. type => "string",
  183. example => "",
  184. description => "Classes to use for indented code blocks",
  185. safe => 1,
  186. rebuild => 1,
  187. },
  188. pandoc_math => {
  189. type => "string",
  190. example => "mathjax",
  191. description => "How to process TeX math (mathjax, katex, mathml, mathjs, latexmathml, asciimathml, mimetex, webtex)",
  192. safe => 1,
  193. rebuild => 1,
  194. },
  195. pandoc_math_custom_js => {
  196. type => "string",
  197. example => "",
  198. description => "Link to local/custom javascript for math (or to server-side script for mimetex and webtex)",
  199. safe => 1,
  200. rebuild => 1,
  201. },
  202. pandoc_math_custom_css => {
  203. type => "string",
  204. example => "",
  205. description => "Link to local/custom CSS for math (requires appropriate pandoc_math setting)",
  206. safe => 1,
  207. rebuild => 1,
  208. },
  209. pandoc_bibliography => {
  210. type => "string",
  211. example => "",
  212. description => "Path to default bibliography file",
  213. safe => 1,
  214. rebuild => 1,
  215. },
  216. pandoc_csl => {
  217. type => "string",
  218. example => "",
  219. description => "Path to CSL file (for references and bibliography)",
  220. safe => 1,
  221. rebuild => 1,
  222. },
  223. pandoc_csl_default_lang => {
  224. type => "string",
  225. example => "",
  226. description => "Default language code (RFC 1766) for citations processing",
  227. safe => 1,
  228. rebuild => 1,
  229. },
  230. pandoc_filters => {
  231. type => "string",
  232. example => "",
  233. description => "A comma-separated list of custom pandoc filters",
  234. safe => 1,
  235. rebuild => 1,
  236. },
  237. pandoc_latex_template => {
  238. type => "string",
  239. example => "",
  240. description => "Path to pandoc template for LaTeX and normal PDF output",
  241. safe => 1,
  242. rebuild => 0,
  243. },
  244. pandoc_latex_extra_options => {
  245. type => "internal",
  246. default => [],
  247. description => "List of extra pandoc options for LaTeX (and PDF) generation",
  248. safe => 0,
  249. rebuild => 0,
  250. },
  251. pandoc_beamer_template => {
  252. type => "string",
  253. example => "",
  254. description => "Path to pandoc template for Beamer PDF output",
  255. safe => 1,
  256. rebuild => 0,
  257. },
  258. pandoc_beamer_extra_options => {
  259. type => "internal",
  260. default => [],
  261. description => "List of extra pandoc options for Beamer PDF generation",
  262. safe => 0,
  263. rebuild => 0,
  264. },
  265. pandoc_pdf_export_cleanup => {
  266. type => "boolean",
  267. example => "0",
  268. description => "Whether to clean up LaTeX auxiliary files after PDF generation",
  269. safe => 0,
  270. rebuild => 0,
  271. },
  272. pandoc_revealjs_template => {
  273. type => "string",
  274. example => "",
  275. description => "Path to pandoc template for Reveal.js slides output",
  276. safe => 1,
  277. rebuild => 0,
  278. },
  279. pandoc_revealjs_extra_options => {
  280. type => "internal",
  281. default => [],
  282. description => "List of extra pandoc options for Reveal.js slides generation",
  283. safe => 0,
  284. rebuild => 0,
  285. },
  286. pandoc_docx_template => {
  287. type => "string",
  288. example => "",
  289. description => "Path to pandoc template for MS Word (docx) output",
  290. safe => 1,
  291. rebuild => 0,
  292. },
  293. pandoc_docx_extra_options => {
  294. type => "internal",
  295. default => [],
  296. description => "List of extra pandoc options for DOCX generation",
  297. safe => 0,
  298. rebuild => 0,
  299. },
  300. pandoc_odt_template => {
  301. type => "string",
  302. example => "",
  303. description => "Path to pandoc template for OpenDocument (odt) output",
  304. safe => 1,
  305. rebuild => 0,
  306. },
  307. pandoc_odt_extra_options => {
  308. type => "internal",
  309. default => [],
  310. description => "List of extra pandoc options for ODT generation",
  311. safe => 0,
  312. rebuild => 0,
  313. },
  314. pandoc_epub_template => {
  315. type => "string",
  316. example => "",
  317. description => "Path to pandoc template for EPUB3 output",
  318. safe => 1,
  319. rebuild => 0,
  320. },
  321. pandoc_epub_extra_options => {
  322. type => "internal",
  323. default => [],
  324. description => "List of extra pandoc options for EPUB3 generation",
  325. safe => 0,
  326. rebuild => 0,
  327. };
  328. }
  329. sub htmlize ($@) {
  330. my $format = shift;
  331. my %params = @_;
  332. my $page = $params{page};
  333. my $htmlformat = 'html';
  334. local(*PANDOC_IN, *JSON_IN, *JSON_OUT, *PANDOC_OUT);
  335. my @args = ();
  336. # The default assumes pandoc is in PATH
  337. my $command = $config{pandoc_command} || "pandoc";
  338. if ($config{pandoc_smart}) {
  339. push @args, '--smart';
  340. }
  341. if ($config{pandoc_obfuscate}) {
  342. push @args, '--email-obfuscation=references';
  343. } else {
  344. push @args, '--email-obfuscation=none';
  345. }
  346. if ($config{pandoc_html5}) {
  347. $htmlformat = 'html5';
  348. }
  349. if ($config{pandoc_ascii}) {
  350. push @args, '--ascii';
  351. }
  352. if ($config{pandoc_numsect}) {
  353. push @args, '--number-sections';
  354. }
  355. if ($config{pandoc_sectdiv}) {
  356. push @args, '--section-divs';
  357. }
  358. if ($config{pandoc_codeclasses} && ($config{pandoc_codeclasses} ne "")) {
  359. push @args, '--indented-code-classes=' . $config{pandoc_codeclasses};
  360. }
  361. # How to process math. Normally either mathjax or katex.
  362. my %mathconf = map {($_=>"--$_")} qw(
  363. jsmath mathjax latexmathml asciimathml mathml katex mimetex webtex
  364. );
  365. my %with_urls = qw/mimetex 1 webtex 1/;
  366. my $mathopt = $1 if $config{pandoc_math} && $config{pandoc_math} =~ /(\w+)/;
  367. my $custom_js = $config{pandoc_math_custom_js} || '';
  368. # cleanup pandoc-prefixed keys from persistent meta
  369. if (ref $pagestate{$page}{meta} eq 'HASH') {
  370. my @delkeys = ();
  371. foreach my $k (%{ $pagestate{$page}{meta} }) {
  372. push @delkeys, $k if $k =~ /^pandoc_/;
  373. }
  374. delete $pagestate{$page}{meta}{$_} for @delkeys;
  375. }
  376. if ($mathopt && $mathconf{$mathopt}) {
  377. if ($with_urls{$mathopt} && $custom_js) {
  378. # In these cases, the 'custom js' is a misnomer: actually a server-side script
  379. push @args, $mathconf{$mathopt} ."=". $custom_js;
  380. } else {
  381. push @args, $mathconf{$mathopt};
  382. }
  383. $pagestate{$page}{meta}{"pandoc_math"} = $mathopt;
  384. $pagestate{$page}{meta}{"pandoc_math_$mathopt"} = 1;
  385. $pagestate{$page}{meta}{"pandoc_math_custom_js"} = $custom_js if $custom_js;
  386. }
  387. # Convert to intermediate JSON format so that the title block
  388. # can be parsed out
  389. # We must omit the 'bibliography' parameter here, otherwise the list of
  390. # references will be doubled.
  391. my $to_json_pid = open2(*JSON_OUT, *PANDOC_OUT, $command,
  392. '-f', $format,
  393. '-t', 'json',
  394. @args, '--normalize');
  395. error("Unable to open $command") unless $to_json_pid;
  396. # Workaround for perl bug (#376329)
  397. require Encode;
  398. my $content = Encode::encode_utf8($params{content});
  399. # Protect inline plugin placeholders from being mangled by pandoc:
  400. $content =~ s{<div class="inline" id="(\d+)"></div>}
  401. {::INLINE::PLACEHOLDER::$1::}g;
  402. print PANDOC_OUT $content;
  403. close PANDOC_OUT;
  404. my $json_content = <JSON_OUT>;
  405. close JSON_OUT;
  406. waitpid $to_json_pid, 0;
  407. # Parse the title block out of the JSON and set the meta values
  408. my $meta = undef;
  409. my $decoded_json = decode_json($json_content);
  410. # The representation of the meta block changed in pandoc version 1.18
  411. if (ref $decoded_json eq 'HASH' && $decoded_json->{'Meta'}) {
  412. $meta = $decoded_json->{'Meta'} || {}; # post-1.18 version
  413. } elsif (ref $decoded_json eq 'ARRAY') {
  414. $meta = $decoded_json->[0]->{'unMeta'} || {}; # pre-1.18 version
  415. }
  416. unless ($meta) {
  417. warn "WARNING: Unexpected format for meta block. Incompatible version of Pandoc?\n";
  418. }
  419. # Get some selected meta attributes, more specifically:
  420. # (title date bibliography csl subtitle abstract summary description
  421. # version lang locale references author [+ num_authors primary_author]),
  422. # as well as some configuration options (generate_*, *_extra_options, *_template).
  423. my @format_keys = grep { $_ ne 'pdf' } keys %extra_formats;
  424. my %scalar_meta = map { ($_=>undef) } qw(
  425. title date bibliography csl subtitle abstract summary
  426. description version lang locale);
  427. $scalar_meta{$_.'_template'} = undef for @format_keys;
  428. my %bool_meta = map { ("generate_$_"=>0) } keys %extra_formats;
  429. my %list_meta = map { ($_=>[]) } qw/author references/;
  430. $list_meta{$_.'_extra_options'} = [] for @format_keys;
  431. my $have_bibl = 0;
  432. foreach my $k (keys %scalar_meta) {
  433. next unless $meta->{$k};
  434. $scalar_meta{$k} = compile_string($meta->{$k}->{c});
  435. # NB! Note that this is potentially risky, since pagestate is sticky, and
  436. # we only cleanup the pandoc_* values in {meta}.
  437. $pagestate{$page}{meta}{$k} = $scalar_meta{$k};
  438. $pagestate{$page}{meta}{"pandoc_$k"} = $pagestate{$page}{meta}{$k};
  439. }
  440. foreach my $k (keys %bool_meta) {
  441. my $gen_all = $meta->{generate_all_formats} || {};
  442. next unless $meta->{$k} || $gen_all->{c};
  443. my $val = $meta->{$k} ? $meta->{$k}->{c} : $gen_all->{c};
  444. # simplifies matters with JSON::(PP::)Boolean objects
  445. $val = 1 if $val == 1 || $val eq 'true';
  446. if (ref $val || $val =~ /^\s*(?:off|no|false|0)\s*$/i) {
  447. $bool_meta{$k} = 0;
  448. } else {
  449. $bool_meta{$k} = 1;
  450. $pagestate{$page}{meta}{"pandoc_$k"} = 1;
  451. }
  452. }
  453. foreach my $k (keys %list_meta) {
  454. next unless $meta->{$k};
  455. $list_meta{$k} = unwrap_c($meta->{$k});
  456. $list_meta{$k} = [$list_meta{$k}] unless ref $list_meta{$k} eq 'ARRAY';
  457. $have_bibl = 1 if $k eq 'references';
  458. $pagestate{$page}{meta}{"pandoc_$k"} = $list_meta{$k};
  459. }
  460. # Try to add other keys as scalars, with pandoc_ prefix only.
  461. foreach my $k (keys %$meta) {
  462. next if exists $scalar_meta{$k} || exists $list_meta{$k};
  463. eval {
  464. $pagestate{$page}{meta}{"pandoc_$k"} = compile_string($meta->{$k}->{c});
  465. };
  466. }
  467. my $num_authors = scalar @{ $list_meta{author} };
  468. $scalar_meta{num_authors} = $num_authors;
  469. $pagestate{$page}{meta}{num_authors} = $num_authors;
  470. if ($num_authors) {
  471. $scalar_meta{primary_author} = $list_meta{author}->[0];
  472. $pagestate{$page}{meta}{author} = join(', ', @{$list_meta{author}});
  473. $pagestate{$page}{meta}{pandoc_primary_author} = $scalar_meta{primary_author}
  474. }
  475. # The bibliography may be set in a Meta block in the page or in the .setup file.
  476. # If both are present, the Meta block has precedence.
  477. for my $bibl ($scalar_meta{bibliography}, $config{pandoc_bibliography}) {
  478. if ($bibl) {
  479. $have_bibl = 1;
  480. $pagestate{$page}{meta}{pandoc_bibliography} = $bibl;
  481. push @args, '--bibliography='.$bibl;
  482. last;
  483. }
  484. }
  485. # Similarly for the CSL file...
  486. for my $cslfile ($scalar_meta{csl}, $config{pandoc_csl}) {
  487. if ($cslfile) {
  488. $pagestate{$page}{meta}{pandoc_csl} = $cslfile;
  489. push @args, '--csl='.$cslfile;
  490. last;
  491. }
  492. }
  493. # If a default CSL language is specified, add that to args,
  494. # (unless it is overridden by meta)
  495. unless ($scalar_meta{lang} || $scalar_meta{locale}) {
  496. if ($config{pandoc_csl_default_lang}) {
  497. push @args, "--metadata=lang:".$config{pandoc_csl_default_lang};
  498. }
  499. }
  500. # Turn on the pandoc-citeproc filter if either global bibliography,
  501. # local bibliography or a 'references' key in Meta is present.
  502. if ($have_bibl) {
  503. my $citeproc = $config{pandoc_citeproc} || 'pandoc-citeproc';
  504. push @args, "--filter=$citeproc";
  505. }
  506. # Other pandoc filters. Note that currently there is no way to
  507. # configure a filter to run before pandoc-citeproc has done its work.
  508. if ($config{pandoc_filters}) {
  509. my @filters = split /\s*,\s*/, $config{pandoc_filters};
  510. s/^["']//g for @filters; # get rid of enclosing quotes
  511. foreach my $filter (@filters) {
  512. push @args, "--filter=$filter";
  513. }
  514. }
  515. # html_extra_options my be set in Meta block in the page or in the .setup
  516. # file. If both are present, the Meta block has precedence, even if it is
  517. # an empty list
  518. my @html_args = @args;
  519. if (ref $meta->{html_extra_options}{c} eq 'ARRAY') {
  520. if (ref unwrap_c($meta->{html_extra_options}{c}) eq 'ARRAY') {
  521. push @html_args, @{unwrap_c($meta->{html_extra_options}{c})};
  522. } else {
  523. push @html_args, unwrap_c($meta->{html_extra_options}{c});
  524. }
  525. } elsif (ref $config{'pandoc_html_extra_options'} eq 'ARRAY') {
  526. push @html_args, @{$config{'pandoc_html_extra_options'}};
  527. }
  528. my $to_html_pid = open2(*PANDOC_IN, *JSON_IN, $command,
  529. '-f', 'json',
  530. '-t', $htmlformat,
  531. @html_args);
  532. error("Unable to open $command") unless $to_html_pid;
  533. $pagestate{$page}{pandoc_extra_formats} = {};
  534. foreach my $ext (keys %extra_formats) {
  535. if ($bool_meta{"generate_$ext"}) {
  536. export_file($page, $ext, $json_content, $command, @args);
  537. } else {
  538. remove_exported_file($page, $ext);
  539. }
  540. }
  541. print JSON_IN $json_content;
  542. close JSON_IN;
  543. my @html = <PANDOC_IN>;
  544. close PANDOC_IN;
  545. waitpid $to_html_pid, 0;
  546. $content = Encode::decode_utf8(join('', @html));
  547. # Reinstate placeholders for inline plugin:
  548. $content =~ s{::INLINE::PLACEHOLDER::(\d+)::}
  549. {<div class="inline" id="$1"></div>}g;
  550. return $content;
  551. }
  552. sub pagetemplate (@) {
  553. my %params = @_;
  554. my $page = $params{page};
  555. my $template = $params{template};
  556. foreach my $k (keys %{$pagestate{$page}{meta}}) {
  557. next unless $k =~ /^pandoc_/;
  558. $template->param($k => $pagestate{$page}{meta}{$k});
  559. }
  560. }
  561. sub pageactions {
  562. my %args = @_;
  563. my $page = $args{page};
  564. my @links = ();
  565. return unless $pagestate{$page}{pandoc_extra_formats};
  566. my @exts = sort {
  567. $extra_formats{$a}->{order} <=> $extra_formats{$b}->{order}
  568. } keys %{ $pagestate{$page}{pandoc_extra_formats} };
  569. foreach my $ext (@exts) {
  570. my $url = $pagestate{$page}{pandoc_extra_formats}{$ext};
  571. next unless $url;
  572. my $label = $extra_formats{$ext}->{label} || $ext;
  573. push @links, qq[
  574. <a href="$url"
  575. class="extra-format-link"
  576. title="Download $label version of this page"
  577. target="_blank">$label</a>
  578. ];
  579. }
  580. return @links;
  581. }
  582. sub export_file {
  583. my ($page, $ext, $json_content, $command, @args) = @_;
  584. my ($export_path, $export_url) = _export_file_path_and_url($page, $ext);
  585. my $subdir = $1 if $export_path =~ /(.*)\//;
  586. my @extra_args = @{ $extra_formats{$ext}->{extra} };
  587. my $eopt = $ext eq 'pdf' ? 'latex' : $ext;
  588. # Note that template in meta OVERRIDES template in config,
  589. # while extra_options in meta are ADDED to extra_options in config.
  590. my $template = $pagestate{$page}{meta}{"pandoc_".$eopt."_template"}
  591. || $config{"pandoc_".$eopt."_template"} || '';
  592. if ($template) {
  593. push @extra_args, ($ext =~ /^(docx|odt)$/
  594. ? "--reference-$ext=$template"
  595. : "--template=$template");
  596. }
  597. my $conf_extra = $config{"pandoc_".$eopt."_extra_options"};
  598. my $conf_extra_custom = $pagestate{$page}{meta}{"pandoc_".$eopt."_extra_options"};
  599. foreach my $cnf ($conf_extra, $conf_extra_custom) {
  600. if (ref $cnf eq 'ARRAY' && @$cnf) {
  601. push @extra_args, @$cnf;
  602. }
  603. }
  604. my $pdf_cleanup = 0;
  605. if (defined $pagestate{$page}{meta}{"pandoc_pdf_export_cleanup"}) {
  606. $pdf_cleanup = $pagestate{$page}{meta}{"pandoc_pdf_export_cleanup"};
  607. } elsif ($config{"pandoc_pdf_export_cleanup"}) {
  608. $pdf_cleanup = 1;
  609. }
  610. # If the user has asked for native LaTeX bibliography handling in the
  611. # extra_args for this export format (using --biblatex or --natbib),
  612. # some extra care is needed. Among other things, we need an external
  613. # tool for PDF generation. In this case, $indirect_pdf will be true.
  614. my %maybe_non_citeproc = qw/latex 1 pdf 1 beamer 1/;
  615. my $indirect_pdf = 0;
  616. if ($maybe_non_citeproc{$ext} && grep { /^(?:--biblatex|--natbib)$/ } @extra_args) {
  617. $indirect_pdf = 1 unless $ext eq 'latex'; # both for pdf and beamer
  618. @args = grep { ! /--filter=.*pandoc-citeproc/ } @args;
  619. }
  620. eval {
  621. if ($subdir && !-d $subdir) {
  622. make_path($subdir) or die "Could not make_path $subdir: $!";
  623. }
  624. my $to_format = $extra_formats{$ext}->{format} || $ext;
  625. my $tmp_export_path = $export_path;
  626. $tmp_export_path =~ s/\.pdf$/.tex/ if $indirect_pdf;
  627. open(EXPORT, "|-",
  628. $command,
  629. '-f' => 'json',
  630. '-t' => $to_format,
  631. '-o' => $tmp_export_path,
  632. @args, @extra_args) or die "Could not open pipe for $ext: $!";
  633. print EXPORT $json_content;
  634. close EXPORT or die "Could not close pipe for $ext: $!";
  635. if ($indirect_pdf && $tmp_export_path ne $export_path) {
  636. my @latexmk_args = qw(-quiet -silent);
  637. if (grep { /xelatex/ } @extra_args) {
  638. push @latexmk_args, '-xelatex';
  639. } elsif (grep { /lualatex/ } @extra_args) {
  640. push @latexmk_args, '-lualatex';
  641. } else {
  642. push @latexmk_args, '-pdf';
  643. }
  644. chdir $subdir or die "Could not chdir to $subdir: $!";
  645. my $plain_fn = $1 if $tmp_export_path =~ /([^\/]+)$/;
  646. $plain_fn =~ s/\.tex//;
  647. system('latexmk', @latexmk_args, $plain_fn) == 0
  648. or die "Could not run latexmk for pdf generation ($export_path): $!";
  649. if ($pdf_cleanup) {
  650. system('latexmk', '-c', '-quiet', '-silent', $plain_fn) == 0
  651. or die "Could not run latexmk for cleanup ($export_path): $!";
  652. # These files are apparently not cleaned up by latexmk -c.
  653. foreach ('run.xml', 'bbl') {
  654. my $fn = "$subdir/$plain_fn.$_";
  655. unlink($fn) if -f $fn;
  656. }
  657. }
  658. }
  659. $pagestate{$page}{pandoc_extra_formats}{$ext} = $export_url;
  660. };
  661. if ($@) {
  662. warn "EXPORT ERROR FOR $page (format: $ext): $@\n";
  663. }
  664. }
  665. sub remove_exported_file {
  666. my ($page, $ext) = @_;
  667. my ($export_path, $export_url) = _export_file_path_and_url($page, $ext);
  668. if (-f $export_path) {
  669. eval { unlink $export_path or die "Could not unlink $export_path: $!" };
  670. if ($@) {
  671. warn "WARNING: remove_exported_file; page=$page, ext=$ext: $@\n";
  672. }
  673. }
  674. }
  675. sub _export_file_path_and_url {
  676. my ($page, $ext) = @_;
  677. # the html file will end up in "$destdir/$page/index.html",
  678. # while e.g. a pdf will be in "$destdir/$page/$page_minus_dirs.pdf".
  679. my $extension = $extra_formats{$ext}->{ext} || $ext;
  680. my $destdir = $config{destdir} || '.';
  681. my $page_minus_dirs = $1 if $page =~ /([^\/]*)$/;
  682. $page_minus_dirs ||= 'index';
  683. my $export_path = "$destdir/$page/$page_minus_dirs.$extension";
  684. my $export_url = $config{url};
  685. $export_url .= "/" unless $export_url =~ /\/$/;
  686. $export_url .= "$page/$page_minus_dirs.$extension";
  687. return ($export_path, $export_url);
  688. }
  689. ## compile_string and unwrap_c are used to make the meta data structures
  690. ## easier to work with for perl.
  691. sub compile_string {
  692. # Partially represents an item from the data structure in meta as a string.
  693. my @uncompiled = @_;
  694. return $uncompiled[0] if @uncompiled==1 && !ref($uncompiled[0]);
  695. @uncompiled = @{$uncompiled[0]} if @uncompiled==1 && ref $uncompiled[0] eq 'ARRAY';
  696. my $compiled_string = '';
  697. foreach my $word_or_space (@uncompiled) {
  698. next unless ref $word_or_space eq 'HASH';
  699. my $type = $word_or_space->{'t'} || '';
  700. $compiled_string .= compile_string(@{ $word_or_space->{c} }) if $type eq 'MetaInlines';
  701. next unless $type eq 'Str' || $type eq 'Space' || $type eq 'MetaString';
  702. $compiled_string .= $type eq 'Space' ? ' ' : $word_or_space->{c};
  703. }
  704. return $compiled_string;
  705. }
  706. sub unwrap_c {
  707. # Unwrap pandoc's MetaLists, MetaInlines, etc.
  708. # Finds the deepest-level scalar value for 'c' in the data structure.
  709. # Lists with one element are replaced with the scalar, lists with more
  710. # than one element are returned as an arrayref containing scalars.
  711. my $container = shift;
  712. if (ref $container eq 'ARRAY' && @$container > 1) {
  713. if (ref $container->[0] eq 'HASH' && $container->[0]->{t} =~ /^(?:Str|Space)$/) {
  714. # handles scalar author fields
  715. return join('', map { compile_string($_) } @$container);
  716. } else {
  717. return [map {unwrap_c($_)} @$container];
  718. }
  719. } elsif (ref $container eq 'ARRAY' && @$container) {
  720. return unwrap_c($container->[0]);
  721. } elsif (ref $container eq 'ARRAY') {
  722. return;
  723. } elsif (ref $container eq 'HASH' && $container->{c}) {
  724. return unwrap_c($container->{c});
  725. } elsif (ref $container) {
  726. return;
  727. } else {
  728. return $container;
  729. }
  730. }
  731. 1;