diff options
author | Jonas Smedegaard <dr@jones.dk> | 2014-12-30 17:30:07 +0100 |
---|---|---|
committer | Jonas Smedegaard <dr@jones.dk> | 2014-12-30 18:24:06 +0100 |
commit | fee2f3a48759f1581eb4a0c83815be9162ad5e4f (patch) | |
tree | 01b4e3bf0b3c0ec4f0a9f3135e20e7283bbaf235 | |
parent | b8df713f513536e1d7404a61b19d264c0db6a4e5 (diff) |
Optionally validate URLs, enabled in new flightcheck mode.
-rw-r--r-- | Makefile | 2 | ||||
-rwxr-xr-x | pandoc-iri | 23 |
2 files changed, 23 insertions, 2 deletions
@@ -1,3 +1,4 @@ +# expensive checks enabled: make -B flightcheck=1 # Final document: make -B final=1 stem = eut @@ -17,6 +18,7 @@ localfilters = $(filter ./%,$(filters)) title = Ensuring utmost transparency subtitle = Free Software and Open Standards under the Rules of Procedure of the European Parliament env_filter += $(citeproc_file:%=PANDOC_CITEPROC_FILE=%) +env_filter += $(if $(flightcheck),NETWORK_TESTS=1) args_filter += $(filters:%=--filter %) args_meta += $(citeproc_file:%=-M bibliography=%) args_meta += -V lang=english -V langoption=variant=british @@ -1,5 +1,8 @@ #!/usr/bin/perl +# canonicalize URIs links, beautify their text representations +# set NETWORK_TESTS=1 to also validate and report failures to STDERR + use warnings; use strict; @@ -9,6 +12,9 @@ use Pandoc::Elements; use URI; use URI::Escape; use URI::Find; +use HTTP::Tiny 0.014; + +my ($ua, %links); pandoc_filter( \&link_normalize, @@ -26,12 +32,25 @@ sub link_normalize { return; } +sub normalize_uri { + my $uri = URI->new(uri_unescape(shift))->canonical; + return $uri unless $ENV{'NETWORK_TESTS'}; + return $uri if ( defined $links{$uri} ); + $ua = HTTP::Tiny->new() + unless ( defined $ua ); + $links{$uri} = $ua->head($uri); + printf STDERR "Failed fetching <%s>: %u %s\n", + $uri, $links{$uri}->{status}, $links{$uri}->{reason} + unless ($links{$uri}->{success}); + return $uri; +} + sub pp_uri { - return URI->new(uri_unescape(shift))->canonical->as_string; + return normalize_uri(shift)->as_string; } sub pp_iri { - return URI->new(uri_unescape(shift))->canonical->as_iri; + return normalize_uri(shift)->as_iri; } sub pp_string { |