summaryrefslogtreecommitdiff
path: root/mediawiki-extract
diff options
context:
space:
mode:
authorJonas Smedegaard <dr@jones.dk>2014-12-30 01:50:08 +0100
committerJonas Smedegaard <dr@jones.dk>2014-12-30 01:50:08 +0100
commit6f98b4dfb2b80e3eb3072dd3f140351581746db2 (patch)
tree3504d7481f617648637af7e3209bb32f45e605c0 /mediawiki-extract
parent5b493a9bf8356bce9472256a3a2a16c3b7ab2643 (diff)
Move mediawiki tweaks to separate scripts.
Diffstat (limited to 'mediawiki-extract')
-rwxr-xr-xmediawiki-extract22
1 files changed, 22 insertions, 0 deletions
diff --git a/mediawiki-extract b/mediawiki-extract
new file mode 100755
index 0000000..c2dae68
--- /dev/null
+++ b/mediawiki-extract
@@ -0,0 +1,22 @@
+#!/usr/bin/perl
+
+# extract and decode mediawiki content from HTML source view
+
+use warnings;
+use strict;
+
+use Path::Tiny;
+use HTML::Entities;
+
+my $infile = shift;
+my $outfile = shift || $infile;
+
+$_ = path($infile)->slurp_utf8;
+
+s!.*<textarea[^>]*>!!s;
+s!</textarea.*!!s;
+decode_entities($_);
+
+path($outfile)->spew_utf8($_);
+
+1;