summaryrefslogtreecommitdiff
path: root/mediawiki-uri-escape
blob: 40dd10c44cfbd1722c1fe3bc90c581eac2b229c4 (plain)
  1. #!/usr/bin/perl
  2. # work around modest URL parsing <https://github.com/jgm/pandoc/issues/1838>
  3. use warnings;
  4. use strict;
  5. use Path::Tiny;
  6. use Regexp::Common qw(net);
  7. use URI::Escape;
  8. my $infile = shift;
  9. my $outfile = shift || $infile;
  10. $_ = path($infile)->slurp_utf8;
  11. my $uri_path = qr!https?://$RE{net}{domain}{-nospace}(?::\d+)?\K(/\S*?)!;
  12. my $uri_end = qr!(?=[.,;]?[\s<\"\|}])!;
  13. s|\b$uri_path$uri_end|uri_escape(uri_unescape($1), "^A-Za-z0-9/\.")|eg;
  14. path($outfile)->spew_utf8($_);
  15. 1;