From 27d338dec0428bc22e2838eb8641c6e0d1681e22 Mon Sep 17 00:00:00 2001
From: Jonas Smedegaard
Date: Fri, 10 May 2013 20:46:04 +0200
Subject: Include mk* scripts
---
mkhtm2html-2 | 39 +++++++++++++++++++++++++++++++++++++++
1 file changed, 39 insertions(+)
create mode 100755 mkhtm2html-2
(limited to 'mkhtm2html-2')
diff --git a/mkhtm2html-2 b/mkhtm2html-2
new file mode 100755
index 0000000..ae18391
--- /dev/null
+++ b/mkhtm2html-2
@@ -0,0 +1,39 @@
+#!/usr/bin/perl
+
+# normalize HTML
+
+use Getopt::Long;
+use File::Slurp;
+
+use strict;
+use warnings;
+
+my $force;
+GetOptions ("force|f");
+
+my $stem = shift;
+my $_ = read_file( $stem . '.htm' );
+
+# whitespace
+s/(?: |\h)+/ /mg;
+
+# preamble
+s{.*>HAVE ADOPTED THIS REGULATION:
\s*}{}s;
+
+# page header
+s{]*;top:(?:1172|1187)px;[^>]*>(?:(?!
\s*}{}mg;
+
+# headline
+s{]*>(?:In Title \S+, the following Section \S+ is inserted:
\s*]*>)?\'?(SECTION \S+)
((?:(?!
}{$1
\n$2
}mg;
+s{]*>\'?(Article \S+)
}{$1
}mg;
+s{]*>(?:Article \S+ is replaced by the following:
\s*]*>)?\'?(Article \S+)
((?:(?!
}{$1
\n$2
}mg;
+s{]*>(Article \S+) is amended as follows:
}{$1
}mg;
+s{]*>(?:paragraph \S+ is replaced by the following:
\s*)(]*>)\'?(\d+)\. }{
$2
\n$1}mg;
+s{]*>In (Article \S+), paragraph (\S+) is replaced by the following:
\s*(]*>)\'?(\2)\. }{
$1
\n$2
\n$3}mg;
+
+# unwrap
+s{\s*
\s*}{ }mg;
+
+write_file( $stem . '.html', $_ );
+
+print "DONE: $0 stem $stem\n";
--
cgit v1.2.3