#!/usr/bin/perl # normalize HTML use Getopt::Long; use File::Slurp; use strict; use warnings; my $force; GetOptions ("force|f"); my $stem = shift; my $_ = read_file( $stem . '.htm' ); # whitespace s/(?: |\h)+/ /mg; # preamble s{.*>HAVE ADOPTED THIS REGULATION:

\s*}{}s; # page header s{]*;top:(?:1172|1187)px;[^>]*>(?:(?!\s*}{}mg; # headline s{]*>(?:In Title \S+, the following Section \S+ is inserted:

\s*]*>)?\'?(SECTION \S+)
((?:(?!
}{
$1
\n
$2
}mg; s{]*>\'?(Article \S+)
}{
$1
}mg; s{]*>(?:Article \S+ is replaced by the following:
\s*]*>)?\'?(Article \S+)
((?:(?!
}{
$1
\n
$2
}mg; s{]*>(Article \S+) is amended as follows:
}{
$1
}mg; s{]*>(?:paragraph \S+ is replaced by the following:
\s*)(]*>)\'?(\d+)\. }{
$2
\n$1}mg; s{]*>In (Article \S+), paragraph (\S+) is replaced by the following:
\s*(]*>)\'?(\2)\. }{
$1
\n
$2
\n$3}mg; # unwrap s{\s*
\s*}{ }mg; write_file( $stem . '.html', $_ ); print "DONE: $0 stem $stem\n";