From 27d338dec0428bc22e2838eb8641c6e0d1681e22 Mon Sep 17 00:00:00 2001 From: Jonas Smedegaard Date: Fri, 10 May 2013 20:46:04 +0200 Subject: Include mk* scripts --- mkhtm2html-2 | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100755 mkhtm2html-2 (limited to 'mkhtm2html-2') diff --git a/mkhtm2html-2 b/mkhtm2html-2 new file mode 100755 index 0000000..ae18391 --- /dev/null +++ b/mkhtm2html-2 @@ -0,0 +1,39 @@ +#!/usr/bin/perl + +# normalize HTML + +use Getopt::Long; +use File::Slurp; + +use strict; +use warnings; + +my $force; +GetOptions ("force|f"); + +my $stem = shift; +my $_ = read_file( $stem . '.htm' ); + +# whitespace +s/(?: |\h)+/ /mg; + +# preamble +s{.*>HAVE ADOPTED THIS REGULATION:

\s*}{}s; + +# page header +s{]*;top:(?:1172|1187)px;[^>]*>(?:(?!\s*}{}mg; + +# headline +s{]*>(?:In Title \S+, the following Section \S+ is inserted:

\s*]*>)?\'?(SECTION \S+)
((?:(?!

}{

$1

\n

$2

}mg; +s{]*>\'?(Article \S+)

}{

$1

}mg; +s{]*>(?:Article \S+ is replaced by the following:

\s*]*>)?\'?(Article \S+)
((?:(?!

}{

$1

\n

$2

}mg; +s{]*>(Article \S+) is amended as follows:

}{

$1

}mg; +s{]*>(?:paragraph \S+ is replaced by the following:

\s*)(]*>)\'?(\d+)\. }{

$2

\n$1}mg; +s{]*>In (Article \S+), paragraph (\S+) is replaced by the following:

\s*(]*>)\'?(\2)\. }{

$1

\n

$2

\n$3}mg; + +# unwrap +s{\s*
\s*}{ }mg; + +write_file( $stem . '.html', $_ ); + +print "DONE: $0 stem $stem\n"; -- cgit v1.2.3