#!/usr/bin/perl
# normalize HTML
use Getopt::Long;
use File::Slurp;
use strict;
use warnings;
my $force;
GetOptions ("force|f");
my $stem = shift;
my $_ = read_file( $stem . '.htm' );
# whitespace
s/(?: |\h)+/ /mg;
# preamble
s{.*>HAVE ADOPTED THIS REGULATION:
\s*}{}s;
# page header
s{]*;top:(?:1172|1187)px;[^>]*>(?:(?!
\s*}{}mg;
# headline
s{]*>(?:In Title \S+, the following Section \S+ is inserted:
\s*]*>)?\'?(SECTION \S+)
((?:(?!
}{$1
\n$2
}mg;
s{]*>\'?(Article \S+)
}{$1
}mg;
s{]*>(?:Article \S+ is replaced by the following:
\s*]*>)?\'?(Article \S+)
((?:(?!
}{$1
\n$2
}mg;
s{]*>(Article \S+) is amended as follows:
}{$1
}mg;
s{]*>(?:paragraph \S+ is replaced by the following:
\s*)(]*>)\'?(\d+)\. }{
$2
\n$1}mg;
s{]*>In (Article \S+), paragraph (\S+) is replaced by the following:
\s*(]*>)\'?(\2)\. }{
$1
\n$2
\n$3}mg;
# unwrap
s{\s*
\s*}{ }mg;
write_file( $stem . '.html', $_ );
print "DONE: $0 stem $stem\n";
d class='sub'>Unnamed repository; edit this file 'description' to name the repository.Jonas Smedegaard |