diff options
authorJonas Smedegaard <>2014-12-22 09:16:35 +0100
committerJonas Smedegaard <>2014-12-22 09:16:35 +0100
commitf8955236b96711e609e3f2eb5d95998de4812755 (patch)
parent393da4bf5eac7b21cfd92406034d297a10e3cae2 (diff)
Add section and bibliography filters.
4 files changed, 132 insertions, 3 deletions
diff --git a/Makefile b/Makefile
index d1f0ca5..1cd26e5 100644
--- a/Makefile
+++ b/Makefile
@@ -5,6 +5,9 @@ source_basename = EUT/2nd-edition
# work around in mediawiki bug parsing blockquote tags on same line
re_blockquote = s|\n*(</?blockquote>)\n*|\n$$1\n|g
+export PANDOC_CITEPROC_FILE = $(stem).bib
+#PANDOC_CITEPROC_FILE = $(stem).bib
all: $(stem).pdf
@@ -21,7 +24,8 @@ $(stem).mediawiki: $(stem).raw
$(stem).pdf: $(stem).mediawiki template.tex
pandoc -f mediawiki --template=template.tex --latex-engine=xelatex \
- --toc \
+ --filter ./pandoc-filter-sections --filter ./pandoc-filter-bib \
+ --bibliography=$(PANDOC_CITEPROC_FILE) \
-V papersize=a4paper -V fontsize=11pt \
-V documentclass=memoir -V chapterstyle=demo3 \
-V classoption=oneside -V classoption=titlepage -V classoption=twocolumn \
diff --git a/TODO b/TODO
index 2c44301..8aa6c97 100644
--- a/TODO
+++ b/TODO
@@ -14,8 +14,10 @@ Input (last checked 2014-12-21 22-39):
* Handle draft/final switch, and fail on warnings when in draft mode.
- * Use pandoc filter for bugfixing (not multiple passes + perl regex).
- * Decode and properly handle bibliography (i.e. {{cite ...}} markup).
+ * Fix avoid section filter eating subsequent content.
+ * use Header (not latex RawBlock) as header markup in section filter.
+ * Refine bibliography regarding web sources and timestamp markup.
+ <>
Content modelling:
* Favor british at all uses of "english" documentclass hint.
diff --git a/pandoc-filter-bib b/pandoc-filter-bib
new file mode 100755
index 0000000..27ba8d4
--- /dev/null
+++ b/pandoc-filter-bib
@@ -0,0 +1,68 @@
+use warnings;
+use strict;
+use v5.10.1; # use switch keyword "when" (but avoid too complex "given")
+no if $] >= 5.018, warnings => "experimental::smartmatch";
+use Pandoc::Filter;
+use Pandoc::Elements;
+use Path::Tiny;
+use List::Util qw(pairmap);
+my $bibfilename = $ENV{'PANDOC_CITEPROC_FILE'} || 'bibliography.bib';
+my $bibfile = path($bibfilename);
+my (@a, $i);
+pandoc_filter sub {
+ my $self = shift;
+ return unless ($self->name eq 'RawInline' and $self->format eq 'mediawiki');
+ for ($self->content) {
+ when (/^{{cite\s+(\w+)\s*\|([^}]*)}}$/) {
+ my $id = 'ref'.++$i;
+ my @data = pairmap { $b =~ s/"/\\"/g; "$a=\"$b\"" }
+ map { /^\s*(\w+)\s*=\s*"?(.*?)"?\s*$/ }
+ split( /\|/, $2 );
+ push @a, join ",\n",
+ '@'.$1.'{'.$id,
+ @data,
+ '}';
+ return Cite(
+ [{
+ 'citationId' => $id,
+ 'citationPrefix' => [],
+ 'citationSuffix' => [],
+ 'citationMode' => {
+ t => 'NormalCitation',
+ c => [],
+ },
+ 'citationNoteNum' => 0,
+ 'citationHash' => 0,
+ }],
+ [ Str $id ],
+ );
+ }
+ when (/^{{citation needed}}$/) {
+ say STDERR "WARNING: Mediawiki citation needed.";
+ return [
+ Str "citation",
+ Space,
+ Str "needed",
+ ];
+ }
+ default {
+ say STDERR "WARNING: Mediawiki unknown data skipped: "
+ . '"' . $_[0]->content . '"';
+ return Str "";
+ }
+ }
+END {
+ $bibfile->spew_utf8( join "\n\n", @a )
+ if (@a);
diff --git a/pandoc-filter-sections b/pandoc-filter-sections
new file mode 100755
index 0000000..f0bb5a5
--- /dev/null
+++ b/pandoc-filter-sections
@@ -0,0 +1,55 @@
+use warnings;
+use strict;
+use Pandoc::Filter;
+use Pandoc::Elements;
+# FIXME: avoid eating content past tweaked headers
+ \&frontmatter,
+ \&mainmatter,
+ \&backmatter,
+ \&toc,
+# FIXME: use Header (not latex RawBlock) - why does it hang?!?
+sub header {
+ my $label = shift;
+# Header( 1, attributes {}, [ Str $label ], );
+ RawBlock( 'latex', '\\chapter{'.$label.'}' );
+sub frontmatter {
+ my $self = shift;
+ return [ RawBlock( 'latex', '\\frontmatter' ), header('About'), ]
+ if ( $self->name eq 'Header' and $self->level >= 1
+ and stringify($self) eq 'About' );
+ return;
+sub mainmatter {
+ my $self = shift;
+ return [ RawBlock( 'latex', '\\mainmatter' ), header(stringify($self)), ]
+ if ( $self->name eq 'Header' and $self->level == 1
+ and stringify($self) =~ /^Scope/ );
+ return;
+sub backmatter {
+ my $self = shift;
+ return [ RawBlock( 'latex', '\\backmatter' ), header('References'), ]
+ if ( $self->name eq 'Header' and $self->level == 1
+ and stringify($self) =~ /^Notes/ );
+ return;
+sub toc {
+ my $self = shift;
+ return unless ( $self->name eq 'Header' and $self->level == 1
+ and stringify($self) =~ /^Table/ );
+ return RawBlock( 'latex',
+ '{\\hypersetup{linkcolor=black}\\setcounter{tocdepth}{3}\\tableofcontents}'
+ );