#!/usr/bin/perl

# TODO: support multi-match within word
# TODO: support multi-word match
# TODO: always count and allow expressing which (default: all)
# TODO: parse regex+commeent as definition in external markdown TODO file.
# TODO: normal hyphenation within note (keep quote with word in "word")
# TODO: clear font styling in notes
# TODO: track headline levels and note skipping when "diving"

use warnings;
use strict;

use feature qw(switch);
no if $] >= 5.018, warnings => "experimental::smartmatch";

use Pandoc::Filter 0.05;
use Pandoc::Elements;

my $breakable_chars = qr/[\&\;\/\.\?\!\=]/;
my $hyphen_chars = qr/[\x{2010}-\x{2015}\x{2053}\x{2212}]/;
my $doublequote_chars = qr/[\x{00AB}\x{00BB}\x{201C}-\x{201F}\x{300C}-\x{300F}\x{301D}-\x{301F}\x{FE41}-\x{FE44}\x{FF02}\x{FF62}\x{FF63}]/;
my $singlequote_chars = qr/[\x{0060}\x{2018}\x{2019}\x{201A}\x{201B}\x{2039}\x{203A}\x{FF07}]/;
my $odd_singlequote_chars = qr/[\x{2019}]/;

my $inline_count;

pandoc_filter(
	\&todo,
);

sub latex_encode {
	my $s = shift;
	$s =~ s/(?<!\s|$breakable_chars)($breakable_chars])(?!\s)/$1\\-/g;
	$s =~ s/$odd_singlequote_chars/'/g; #'
	return $s;
}

sub mark_inside {
	my ( $before, $it, $after, $note ) = @_;
	return [ RawInline( 'latex', sprintf( '%s\\mytodo{%s}{%s}%s',
		latex_encode($before),
		latex_encode($note),
		latex_encode($it),
		latex_encode($after),
	))];
};

sub todo {
	my $self = shift;
	$inline_count = 0 if ( $self->is_block );
	return unless ( $self->name eq 'Str' );
	given ($self->content) {
		$inline_count++;
		when (/^(-)$/) {
			return mark_inside( '', $1, '', 'maybe break' ) };
		when (/^(---+)(.+?)$/) {
			return mark_inside( '', $1, $2, 'maybe break' ) };
		when (/^(.+?)(--+)(.+?)$/) {
			return mark_inside( $1, $2, $3, 'maybe range' ) };
		when ( /^(.*?)($hyphen_chars(?:.*$hyphen_chars)?)(.*?)$/ ) {
			return mark_inside( $1, $2, $3, 'fancy hyphen' ) };
		when ( /^(.*?)($doublequote_chars(?:.*$doublequote_chars)?)(.*?)$/ ) {
			return mark_inside( $1, $2, $3, 'fancy quote' ) };
		when ( /^(.*?)($singlequote_chars(?:.*$singlequote_chars)?)(.*?)$/ ) {
			return mark_inside( $1, $2, $3, 'fancy quote' ) };
		when ( $inline_count == 1 and /^(\(?\d+\)|\d+\.)$/ ) {
			return mark_inside( '', $1, '', 'maybe list' ) };
		when ( $inline_count == 1 and /^(\(?[ivxc]+\)|\d+\.[\d.]+)$/ ) {
			return mark_inside( '', $1, '', 'maybe fancy list' ) };
		default { return };
	}
};