summaryrefslogtreecommitdiff
path: root/pandoc-iri
blob: a93c37f52e2545078025684af4ad65449c4fd7ff (plain)
  1. #!/usr/bin/perl
  2. # canonicalize URIs links, beautify their text representations
  3. # set NETWORK_TESTS=1 to also validate and report failures to STDERR
  4. use warnings;
  5. use strict;
  6. use Pandoc::Filter 0.05;
  7. use Pandoc::Elements 0.16;
  8. use URI;
  9. use URI::Escape;
  10. use URI::Find;
  11. use HTTP::Tiny 0.014;
  12. my ($ua, %links);
  13. pandoc_filter(
  14. \&link_normalize,
  15. );
  16. sub link_normalize {
  17. my $self = shift;
  18. return [ Link(
  19. attributes {},
  20. $self->content,
  21. [ pp_uri($self->target->[0]) => '' ]
  22. )]
  23. if ( $self->name eq 'Link' );
  24. return [ Str pp_string($self->content) ]
  25. if ( $self->name eq 'Str' );
  26. return;
  27. }
  28. sub normalize_uri {
  29. my $uri = URI->new(uri_unescape(shift))->canonical;
  30. return $uri unless $ENV{'NETWORK_TESTS'};
  31. return $uri if ( defined $links{$uri} );
  32. $ua = HTTP::Tiny->new()
  33. unless ( defined $ua );
  34. $links{$uri} = $ua->head($uri);
  35. printf STDERR "Failed fetching <%s>: %u %s\n",
  36. $uri, $links{$uri}->{status}, $links{$uri}->{reason}
  37. unless ($links{$uri}->{success});
  38. return $uri;
  39. }
  40. sub pp_uri {
  41. return normalize_uri(shift)->as_string;
  42. }
  43. sub pp_iri {
  44. return normalize_uri(shift)->as_iri;
  45. }
  46. sub pp_string {
  47. my $string = shift;
  48. my $finder = URI::Find->new( \&pp_iri );
  49. $finder->find(\$string);
  50. return $string;
  51. }