- #!/usr/bin/perl
- # emphasis + doublequote means technical or foreign term.
- # doublequote + emphasis means title.
- #
- # Pandoc parses quotation marks in two ways (possibly concurrently):
- # * Leave as-is when parsing and serializing
- # * smart-converts to Quoted object and serializes to ``'' or `'
- # Some quotation marks may still be left as-is in smart mode, depending
- # on parser. See e.g. https://github.com/jgm/pandoc/issues/2012
- #
- # This filter handles Quoted objects and (less reliably) Char quotes.
- use Moops -strict;
- use Pandoc::Filter 0.06;
- use Pandoc::Elements 0.12;
- use Scalar::Util qw(reftype);
- use Clone qw(clone);
- use Log::Any qw($log);
- use Log::Any::Adapter ( 'Screen',
- use_color => ! -t *STDERR );
- # TODO: add class Egg, and coercing roles Exotic and Noble
- class Uterus with MooX::Role::Logger {
- lexical_has fetuses => (
- isa => ArrayRef[Object],
- default => sub { [] },
- reader => \( my $fetuses ),
- lazy => 1,
- );
- has is_monstrous => (
- is => 'rw',
- isa => Bool,
- default => sub { 0 },
- );
- # TODO: replace with method fertilize, with Egg coercions
- method implant ( Object $clone ) {
- push @{ $self->$fetuses }, $clone;
- }
- method create ( Object $monster ) {
- push @{ $self->$fetuses }, $monster;
- $self->is_monstrous(1);
- }
- method abort () {
- splice @{ $self->$fetuses };
- return;
- }
- method deliver () {
- my @babies = splice @{ $self->$fetuses };
- if ($self->_logger->is_debug()) {
- # $self->_logger->debugf( 'giving birth: %s',
- # \@babies );
- for my $baby ( @babies ) {
- $self->_logger->debugf( 'giving birth: %s',
- $baby->name );
- };
- };
- return [ @babies ];
- }
- }
- my $doublequote = qr/[\"\x{00AB}\x{00BB}\x{201C}-\x{201F}\x{300C}-\x{300F}\x{301D}-\x{301F}\x{FE41}-\x{FE44}\x{FF02}\x{FF62}\x{FF63}]/;
- my $nodoublequote = qr/[^\"\x{00AB}\x{00BB}\x{201C}-\x{201F}\x{300C}-\x{300F}\x{301D}-\x{301F}\x{FE41}-\x{FE44}\x{FF02}\x{FF62}\x{FF63}]/;
- my $womb = Uterus->new;
- pandoc_filter(
- \&emphasis,
- );
- # FIXME: wrongly messes with "'''open'''": avoid strong emphasis
- # FIXME: wrongly adds quotation mark to "'''''if''''' ...": maybe avoid emphasis not immediately followed by plain text?
- # TODO: flatten only Str and Space as content of new children, else skip
- sub emphasis {
- my $self = shift;
- # TODO: replace @children with $kids ref, and use at more places
- my (@children, $last_child, $kid);
- # we do our own walk of inline objects
- return unless $self->is_block;
- # skip known irrelevant block types
- return if ( $self->name =~
- /CodeBlock|RawBlock|Header|HorizontalRule|Null/ );
- # skip known irrelevant block types
- if ( $self->name =~
- /OrderedList|BulletList|DefinitionList|Table/
- ) {
- $log->infof( 'emphasis: skipping unsupported block: %s',
- $self->name );
- return
- };
- # align coherently and walk direct descendants
- if ( reftype $self->content eq 'ARRAY') {
- @children = @{ $self->content };
- $last_child = scalar $self->content->[
- ( keys @{ $self->content } ) -1 ];
- } elsif ( reftype $self->content eq 'HASH') {
- @children = [ $self->content ];
- $last_child = scalar $self->content;
- } else {
- $log->warnf( 'emphasis: skipping oddly structured block: %s',
- $self->name );
- return;
- }
- $kid = 0;
- $log->debugf( 'emphasis: processing block: %s',
- $self->name );
- while ($kid < keys @children ) {
- # title markup via Quoted
- # TODO: limit container to type DoubleQuote
- if ( $self->content->[$kid]->name eq 'Quoted'
- and $self->content->[$kid]->content->[0]->name eq 'Emph'
- ) {
- my $new_content = $self->content->[$kid]->content->[0]->string;
- $log->infof( 'emphasis: Quoted+Emph→title: %s',
- $new_content );
- # FIXME: LaTeX-encode content
- $womb->create(
- Quoted( DoubleQuote,
- $new_content ));
- $kid++;
- next;
- }
- # title markup via quotation mark in Str
- if ( $self->content->[$kid]->name eq 'Str'
- and $self->content->[$kid]->content =~ /$doublequote/
- and $self->content->[$kid+1]->name eq 'Emph'
- and $self->content->[$kid+2]->name eq 'Str'
- and $self->content->[$kid+2]->content =~ /$doublequote/
- ) {
- $log->infof( 'emphasis: %s+Emph→title: %s',
- $self->content->[$kid]->content,
- $self->content->[$kid+1]->content );
- $womb->create(
- Quoted( DoubleQuote,
- $self->content->[$kid+1]->content ));
- $kid += 3;
- next;
- };
- # foreign markup via Quoted
- # TODO: limit container to type DoubleQuote
- if ( $self->content->[$kid]->name eq 'Emph'
- and $self->content->[$kid]->content->[0]->name eq 'Quoted'
- ) {
- my $new_content = $self->content->[$kid]->content->[0]->string;
- $log->infof( 'emphasis: Emph+Quoted→foreign: %s',
- $new_content );
- # FIXME: LaTeX-encode content
- $womb->create(
- RawInline( 'latex',
- sprintf( '\foreign{%s}',
- $new_content )));
- $kid++;
- next;
- }
- # foreign markup via quotation mark in Str
- if ( $self->content->[$kid]->name eq 'Emph'
- and $self->content->[$kid]->string
- # =~ /^(\")([^\"]+)\"$/
- =~ /^($doublequote)(^$nodoublequote+)$doublequote$/
- ) {
- $log->infof( 'emphasis: Emph+%s→foreign: %s',
- $1, $2 );
- # FIXME: LaTeX-encode content
- $womb->create(
- RawInline( 'latex',
- sprintf( '\foreign{%s}',
- $2 )));
- $kid++;
- next;
- }
- # clone healthy kid, in case of monstrous siblings
- $womb->implant( $self->content->[$kid] );
- $kid++;
- }
- # cheaper to abort (i.e. return nothing) if no monsters created
- return $womb->abort unless ($womb->is_monstrous);
- my $kids = $womb->deliver;
- $log->debugf( 'delivered kids: %s', $kids );
- # dirty hack to preserve non-content block parts
- my $new_block = clone $self;
- $$new_block{c} = $kids;
- $log->debugf( 'block: %s', $new_block );
- return [ $new_block ];
- }
|