#!/usr/bin/perl # normalize HTML use Getopt::Long; use File::Slurp; use strict; use warnings; my $force; GetOptions ("force|f"); my $stem = shift; my $_ = read_file( $stem . '.htm' ); # whitespace s/(?: |\h)+/ /mg; # preamble s{.*>HAVE ADOPTED THIS REGULATION:

\s*}{}s; # page header s{]*;top:(?:1172|1187)px;[^>]*>(?:(?!\s*}{}mg; # headline s{]*>(?:In Title \S+, the following Section \S+ is inserted:

\s*]*>)?\'?(SECTION \S+)
((?:(?!

}{

$1

\n

$2

}mg; s{]*>\'?(Article \S+)

}{

$1

}mg; s{]*>(?:Article \S+ is replaced by the following:

\s*]*>)?\'?(Article \S+)
((?:(?!

}{

$1

\n

$2

}mg; s{]*>(Article \S+) is amended as follows:

}{

$1

}mg; s{]*>(?:paragraph \S+ is replaced by the following:

\s*)(]*>)\'?(\d+)\. }{

$2

\n$1}mg; s{]*>In (Article \S+), paragraph (\S+) is replaced by the following:

\s*(]*>)\'?(\2)\. }{

$1

\n

$2

\n$3}mg; # unwrap s{\s*
\s*}{ }mg; write_file( $stem . '.html', $_ ); print "DONE: $0 stem $stem\n";