diff options
author | Jonas Smedegaard <dr@jones.dk> | 2013-05-10 20:46:04 +0200 |
---|---|---|
committer | Jonas Smedegaard <dr@jones.dk> | 2013-05-10 20:46:04 +0200 |
commit | 27d338dec0428bc22e2838eb8641c6e0d1681e22 (patch) | |
tree | 18eeb76e189ce03838dd9b23194f1c707b1eabbb /mkhtm2html-2 |
Include mk* scripts
Diffstat (limited to 'mkhtm2html-2')
-rwxr-xr-x | mkhtm2html-2 | 39 |
1 files changed, 39 insertions, 0 deletions
diff --git a/mkhtm2html-2 b/mkhtm2html-2 new file mode 100755 index 0000000..ae18391 --- /dev/null +++ b/mkhtm2html-2 @@ -0,0 +1,39 @@ +#!/usr/bin/perl + +# normalize HTML + +use Getopt::Long; +use File::Slurp; + +use strict; +use warnings; + +my $force; +GetOptions ("force|f"); + +my $stem = shift; +my $_ = read_file( $stem . '.htm' ); + +# whitespace +s/(?: |\h)+/ /mg; + +# preamble +s{.*>HAVE ADOPTED THIS REGULATION:</P>\s*}{}s; + +# page header +s{<P\b[^>]*;top:(?:1172|1187)px;[^>]*>(?:(?!</P\b).)+.</P>\s*}{}mg; + +# headline +s{<P\b[^>]*>(?:In Title \S+, the following Section \S+ is inserted:</P>\s*<P\b[^>]*>)?\'?(SECTION \S+)<br/><b>((?:(?!</P\b).)+.)</b></P>}{<H1>$1</H1>\n<H2>$2</H2>}mg; +s{<P\b[^>]*><i>\'?(Article \S+)</i></P>}{<H1>$1</H1>}mg; +s{<P\b[^>]*>(?:Article \S+ is replaced by the following:</P>\s*<P\b[^>]*>)?\'?(Article \S+)<br/><b>((?:(?!</P\b).)+.)</b></P>}{<H1>$1</H1>\n<H2>$2</H2>}mg; +s{<P\b[^>]*>(Article \S+) is amended as follows:</P>}{<H1>$1</H1>}mg; +s{<P\b[^>]*>(?:paragraph \S+ is replaced by the following:</P>\s*)(<P\b[^>]*>)\'?(\d+)\. }{<H3>$2</H3>\n$1}mg; +s{<P\b[^>]*>In (Article \S+), paragraph (\S+) is replaced by the following:</P>\s*(<P\b[^>]*>)\'?(\2)\. }{<H1>$1</H1>\n<H3>$2</H3>\n$3}mg; + +# unwrap +s{\s*<br/>\s*}{ }mg; + +write_file( $stem . '.html', $_ ); + +print "DONE: $0 stem $stem\n"; |