summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonas Smedegaard <dr@jones.dk>2014-12-30 17:30:07 +0100
committerJonas Smedegaard <dr@jones.dk>2014-12-30 18:24:06 +0100
commitfee2f3a48759f1581eb4a0c83815be9162ad5e4f (patch)
tree01b4e3bf0b3c0ec4f0a9f3135e20e7283bbaf235
parentb8df713f513536e1d7404a61b19d264c0db6a4e5 (diff)
Optionally validate URLs, enabled in new flightcheck mode.
-rw-r--r--Makefile2
-rwxr-xr-xpandoc-iri23
2 files changed, 23 insertions, 2 deletions
diff --git a/Makefile b/Makefile
index 505254c..25079fe 100644
--- a/Makefile
+++ b/Makefile
@@ -1,3 +1,4 @@
+# expensive checks enabled: make -B flightcheck=1
# Final document: make -B final=1
stem = eut
@@ -17,6 +18,7 @@ localfilters = $(filter ./%,$(filters))
title = Ensuring utmost transparency
subtitle = Free Software and Open Standards under the Rules of Procedure of the European Parliament
env_filter += $(citeproc_file:%=PANDOC_CITEPROC_FILE=%)
+env_filter += $(if $(flightcheck),NETWORK_TESTS=1)
args_filter += $(filters:%=--filter %)
args_meta += $(citeproc_file:%=-M bibliography=%)
args_meta += -V lang=english -V langoption=variant=british
diff --git a/pandoc-iri b/pandoc-iri
index 0f3fb0c..9774532 100755
--- a/pandoc-iri
+++ b/pandoc-iri
@@ -1,5 +1,8 @@
#!/usr/bin/perl
+# canonicalize URIs links, beautify their text representations
+# set NETWORK_TESTS=1 to also validate and report failures to STDERR
+
use warnings;
use strict;
@@ -9,6 +12,9 @@ use Pandoc::Elements;
use URI;
use URI::Escape;
use URI::Find;
+use HTTP::Tiny 0.014;
+
+my ($ua, %links);
pandoc_filter(
\&link_normalize,
@@ -26,12 +32,25 @@ sub link_normalize {
return;
}
+sub normalize_uri {
+ my $uri = URI->new(uri_unescape(shift))->canonical;
+ return $uri unless $ENV{'NETWORK_TESTS'};
+ return $uri if ( defined $links{$uri} );
+ $ua = HTTP::Tiny->new()
+ unless ( defined $ua );
+ $links{$uri} = $ua->head($uri);
+ printf STDERR "Failed fetching <%s>: %u %s\n",
+ $uri, $links{$uri}->{status}, $links{$uri}->{reason}
+ unless ($links{$uri}->{success});
+ return $uri;
+}
+
sub pp_uri {
- return URI->new(uri_unescape(shift))->canonical->as_string;
+ return normalize_uri(shift)->as_string;
}
sub pp_iri {
- return URI->new(uri_unescape(shift))->canonical->as_iri;
+ return normalize_uri(shift)->as_iri;
}
sub pp_string {