#!/usr/bin/perl

# canonicalize URIs links, beautify their text representations
# set NETWORK_TESTS=1 to also validate and report failures to STDERR

use warnings;
use strict;

use Pandoc::Filter 0.05;
use Pandoc::Elements 0.16;

use URI;
use URI::Escape;
use URI::Find;
use HTTP::Tiny 0.014;

my ($ua, %links);

pandoc_filter(
	\&link_normalize,
);

sub link_normalize {
	my $self = shift;
	return [ Link(
		attributes {},
		$self->content,
		[ pp_uri($self->target->[0]) => '' ]
	)]
		if ( $self->name eq 'Link' );
	return [ Str pp_string($self->content) ]
		if ( $self->name eq 'Str' );
	return;
}

sub normalize_uri {
	my $uri = URI->new(uri_unescape(shift))->canonical;
	return $uri unless $ENV{'NETWORK_TESTS'};
	return $uri if ( defined $links{$uri} );
	$ua = HTTP::Tiny->new()
		unless ( defined $ua );
	$links{$uri} = $ua->head($uri);
	printf STDERR "Failed fetching <%s>: %u %s\n",
		$uri, $links{$uri}->{status}, $links{$uri}->{reason}
		unless ($links{$uri}->{success});
	return $uri;
}

sub pp_uri {
	return normalize_uri(shift)->as_string;
}

sub pp_iri {
	return normalize_uri(shift)->as_iri;
}

sub pp_string {
	my $string = shift;
	my $finder = URI::Find->new( \&pp_iri );
	$finder->find(\$string);
	return $string;
}