summaryrefslogtreecommitdiff
path: root/pandoc-iri
blob: 9774532c999afb1fa651850299c89f3509f5dfe1 (plain)
  1. #!/usr/bin/perl
  2. # canonicalize URIs links, beautify their text representations
  3. # set NETWORK_TESTS=1 to also validate and report failures to STDERR
  4. use warnings;
  5. use strict;
  6. use Pandoc::Filter;
  7. use Pandoc::Elements;
  8. use URI;
  9. use URI::Escape;
  10. use URI::Find;
  11. use HTTP::Tiny 0.014;
  12. my ($ua, %links);
  13. pandoc_filter(
  14. \&link_normalize,
  15. );
  16. sub link_normalize {
  17. my $self = shift;
  18. return [ Link(
  19. $self->content,
  20. [ pp_uri($self->target->[0]) => '' ]
  21. )]
  22. if ( $self->name eq 'Link' );
  23. return [ Str pp_string($self->content) ]
  24. if ( $self->name eq 'Str' );
  25. return;
  26. }
  27. sub normalize_uri {
  28. my $uri = URI->new(uri_unescape(shift))->canonical;
  29. return $uri unless $ENV{'NETWORK_TESTS'};
  30. return $uri if ( defined $links{$uri} );
  31. $ua = HTTP::Tiny->new()
  32. unless ( defined $ua );
  33. $links{$uri} = $ua->head($uri);
  34. printf STDERR "Failed fetching <%s>: %u %s\n",
  35. $uri, $links{$uri}->{status}, $links{$uri}->{reason}
  36. unless ($links{$uri}->{success});
  37. return $uri;
  38. }
  39. sub pp_uri {
  40. return normalize_uri(shift)->as_string;
  41. }
  42. sub pp_iri {
  43. return normalize_uri(shift)->as_iri;
  44. }
  45. sub pp_string {
  46. my $string = shift;
  47. my $finder = URI::Find->new( \&pp_iri );
  48. $finder->find(\$string);
  49. return $string;
  50. }