summaryrefslogtreecommitdiff
path: root/mkhtm2html-2
blob: ae183917217c99693565615a17786352edd5db64 (plain)
  1. #!/usr/bin/perl
  2. # normalize HTML
  3. use Getopt::Long;
  4. use File::Slurp;
  5. use strict;
  6. use warnings;
  7. my $force;
  8. GetOptions ("force|f");
  9. my $stem = shift;
  10. my $_ = read_file( $stem . '.htm' );
  11. # whitespace
  12. s/(?: |\h)+/ /mg;
  13. # preamble
  14. s{.*>HAVE ADOPTED THIS REGULATION:</P>\s*}{}s;
  15. # page header
  16. s{<P\b[^>]*;top:(?:1172|1187)px;[^>]*>(?:(?!</P\b).)+.</P>\s*}{}mg;
  17. # headline
  18. s{<P\b[^>]*>(?:In Title \S+, the following Section \S+ is inserted:</P>\s*<P\b[^>]*>)?\'?(SECTION \S+)<br/><b>((?:(?!</P\b).)+.)</b></P>}{<H1>$1</H1>\n<H2>$2</H2>}mg;
  19. s{<P\b[^>]*><i>\'?(Article \S+)</i></P>}{<H1>$1</H1>}mg;
  20. s{<P\b[^>]*>(?:Article \S+ is replaced by the following:</P>\s*<P\b[^>]*>)?\'?(Article \S+)<br/><b>((?:(?!</P\b).)+.)</b></P>}{<H1>$1</H1>\n<H2>$2</H2>}mg;
  21. s{<P\b[^>]*>(Article \S+) is amended as follows:</P>}{<H1>$1</H1>}mg;
  22. s{<P\b[^>]*>(?:paragraph \S+ is replaced by the following:</P>\s*)(<P\b[^>]*>)\'?(\d+)\. }{<H3>$2</H3>\n$1}mg;
  23. s{<P\b[^>]*>In (Article \S+), paragraph (\S+) is replaced by the following:</P>\s*(<P\b[^>]*>)\'?(\2)\. }{<H1>$1</H1>\n<H3>$2</H3>\n$3}mg;
  24. # unwrap
  25. s{\s*<br/>\s*}{ }mg;
  26. write_file( $stem . '.html', $_ );
  27. print "DONE: $0 stem $stem\n";