blob: a93c37f52e2545078025684af4ad65449c4fd7ff (
plain)
- #!/usr/bin/perl
- # canonicalize URIs links, beautify their text representations
- # set NETWORK_TESTS=1 to also validate and report failures to STDERR
- use warnings;
- use strict;
- use Pandoc::Filter 0.05;
- use Pandoc::Elements 0.16;
- use URI;
- use URI::Escape;
- use URI::Find;
- use HTTP::Tiny 0.014;
- my ($ua, %links);
- pandoc_filter(
- \&link_normalize,
- );
- sub link_normalize {
- my $self = shift;
- return [ Link(
- attributes {},
- $self->content,
- [ pp_uri($self->target->[0]) => '' ]
- )]
- if ( $self->name eq 'Link' );
- return [ Str pp_string($self->content) ]
- if ( $self->name eq 'Str' );
- return;
- }
- sub normalize_uri {
- my $uri = URI->new(uri_unescape(shift))->canonical;
- return $uri unless $ENV{'NETWORK_TESTS'};
- return $uri if ( defined $links{$uri} );
- $ua = HTTP::Tiny->new()
- unless ( defined $ua );
- $links{$uri} = $ua->head($uri);
- printf STDERR "Failed fetching <%s>: %u %s\n",
- $uri, $links{$uri}->{status}, $links{$uri}->{reason}
- unless ($links{$uri}->{success});
- return $uri;
- }
- sub pp_uri {
- return normalize_uri(shift)->as_string;
- }
- sub pp_iri {
- return normalize_uri(shift)->as_iri;
- }
- sub pp_string {
- my $string = shift;
- my $finder = URI::Find->new( \&pp_iri );
- $finder->find(\$string);
- return $string;
- }
|