diff options
author | Jonas Smedegaard <dr@jones.dk> | 2014-12-30 01:50:08 +0100 |
---|---|---|
committer | Jonas Smedegaard <dr@jones.dk> | 2014-12-30 01:50:08 +0100 |
commit | 6f98b4dfb2b80e3eb3072dd3f140351581746db2 (patch) | |
tree | 3504d7481f617648637af7e3209bb32f45e605c0 /mediawiki-extract | |
parent | 5b493a9bf8356bce9472256a3a2a16c3b7ab2643 (diff) |
Move mediawiki tweaks to separate scripts.
Diffstat (limited to 'mediawiki-extract')
-rwxr-xr-x | mediawiki-extract | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/mediawiki-extract b/mediawiki-extract new file mode 100755 index 0000000..c2dae68 --- /dev/null +++ b/mediawiki-extract @@ -0,0 +1,22 @@ +#!/usr/bin/perl + +# extract and decode mediawiki content from HTML source view + +use warnings; +use strict; + +use Path::Tiny; +use HTML::Entities; + +my $infile = shift; +my $outfile = shift || $infile; + +$_ = path($infile)->slurp_utf8; + +s!.*<textarea[^>]*>!!s; +s!</textarea.*!!s; +decode_entities($_); + +path($outfile)->spew_utf8($_); + +1; |