#!/usr/bin/perl # canonicalize URIs links, beautify their text representations # set NETWORK_TESTS=1 to also validate and report failures to STDERR use warnings; use strict; use Pandoc::Filter 0.05; use Pandoc::Elements 0.16; use URI; use URI::Escape; use URI::Find; use HTTP::Tiny 0.014; my ($ua, %links); pandoc_filter( \&link_normalize, ); sub link_normalize { my $self = shift; return [ Link( attributes {}, $self->content, [ pp_uri($self->target->[0]) => '' ] )] if ( $self->name eq 'Link' ); return [ Str pp_string($self->content) ] if ( $self->name eq 'Str' ); return; } sub normalize_uri { my $uri = URI->new(uri_unescape(shift))->canonical; return $uri unless $ENV{'NETWORK_TESTS'}; return $uri if ( defined $links{$uri} ); $ua = HTTP::Tiny->new() unless ( defined $ua ); $links{$uri} = $ua->head($uri); printf STDERR "Failed fetching <%s>: %u %s\n", $uri, $links{$uri}->{status}, $links{$uri}->{reason} unless ($links{$uri}->{success}); return $uri; } sub pp_uri { return normalize_uri(shift)->as_string; } sub pp_iri { return normalize_uri(shift)->as_iri; } sub pp_string { my $string = shift; my $finder = URI::Find->new( \&pp_iri ); $finder->find(\$string); return $string; }