From 27d338dec0428bc22e2838eb8641c6e0d1681e22 Mon Sep 17 00:00:00 2001
From: Jonas Smedegaard
Date: Fri, 10 May 2013 20:46:04 +0200
Subject: Include mk* scripts
---
mkhtm2html-default | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 63 insertions(+)
create mode 100755 mkhtm2html-default
(limited to 'mkhtm2html-default')
diff --git a/mkhtm2html-default b/mkhtm2html-default
new file mode 100755
index 0000000..c2589a2
--- /dev/null
+++ b/mkhtm2html-default
@@ -0,0 +1,63 @@
+#!/usr/bin/perl
+
+# normalize HTML
+
+use Getopt::Long;
+use File::Slurp;
+
+use strict;
+use warnings;
+
+my $force;
+GetOptions ("force|f");
+
+my $stem = shift;
+my $_ = read_file( $stem . '.htm' );
+
+# whitespace
+s/ / /mg;
+s{]*>\s*
\s*}{}mg;
+
+# page header
+s{]*>\s*\K(?:
]*;top:1\d{3}px;[^>]*>(?:<[bi]>)?[^<]+(?:[bi]>)?
\s*)+}{}mg;
+
+# footnote
+s{
]*>\h+
\s*(?:
]*>\S+
\s*
]*>((?:(?!
\s*)+(?=
)}{}mg;
+
+foreach my $class ( map /(?<=\.)(ft\d+)(?={font-size:7px;)/mg, $_ ) {
+ s{\(
\s*]*\sclass="$class">\d+
\s*]*>\)}{}mg;
+};
+
+# document headers
+s{
.*?\s*}{}msg;
+s{\n